Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Utf-8 check, C++ initial version #463

Merged
merged 4 commits into from
Jun 4, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion engine/src/cmd/basecomputer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ using namespace XMLSupport;
//end for directory thing
extern const char *DamagedCategory;


int BaseComputer:: dirty = 0;

static GFXColor UnsaturatedColor( float r, float g, float b, float a = 1.0f )
Expand Down Expand Up @@ -5980,6 +5979,10 @@ bool BaseComputer::actionConfirmedLoadGame()
if (desc) {
std::string tmp = desc->text();
if (tmp.length() > 0) {
if (!isUtf8SaveGame(tmp)) {
showAlert( tmp + " is not UTF-8, convert it before loading" );
return true;
}
Cockpit *cockpit = player ? _Universe->isPlayerStarship( player ) : 0;
if (player && cockpit) {
UniverseUtil::showSplashScreen( "" );
Expand Down
6 changes: 6 additions & 0 deletions engine/src/cmd/script/director_generic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,11 @@ static void saveStringListPy( int playernum, string mykey, vector< string > name
saveStringList(playernum, mykey, names);
}

static bool isUtf8SaveGamePy( string savegame )
{
return isUtf8SaveGame(savegame);
}


PYTHON_BEGIN_MODULE( Director )
PYTHON_BEGIN_INHERIT_CLASS( Director, pythonMission, PythonMissionBaseClass, "Mission" )
Expand All @@ -348,6 +353,7 @@ PYTHON_DEFINE_GLOBAL( Director, &eraseSaveStringPy, "eraseSaveString" );
PYTHON_DEFINE_GLOBAL( Director, &clearSaveStringPy, "clearSaveString" );
PYTHON_DEFINE_GLOBAL( Director, &loadStringListPy, "loadStringList" );
PYTHON_DEFINE_GLOBAL( Director, &saveStringListPy, "saveStringList" );
PYTHON_DEFINE_GLOBAL( Director, &isUtf8SaveGamePy, "isUtf8SaveGame" );
PYTHON_END_MODULE( Director )

void InitDirector()
Expand Down
105 changes: 105 additions & 0 deletions engine/src/savegame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
#include "universe.h"
#include "options.h"

#include <iostream>
#include <fstream>
#include <iterator>
#include <vector>

typedef unsigned char BYTE;


using namespace VSFileSystem;
Expand Down Expand Up @@ -174,6 +180,105 @@ void SaveFileCopy( const char *src, const char *dst )
}
}

class Utf8Checker {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit late to the party, but I wonder if you could have used some boost facility instead.
Something like https://www.boost.org/doc/libs/1_48_0/libs/locale/doc/html/charset_handling.html. It's possible to ask it to throw an exception. Other than that, LGTM.

public:
/*
* From https://helloacm.com/how-to-validate-utf-8-encoding-the-simple-utf-8-validation-algorithm/
*/
bool validUtf8(const std::vector<BYTE> &data) {
for (long unsigned int i = 0; i < data.size(); ++ i) {
// 0xxxxxxx
int bit1 = (data[i] >> 7) & 1;
stephengtuggy marked this conversation as resolved.
Show resolved Hide resolved
if (bit1 == 0) continue;
// 110xxxxx 10xxxxxx
int bit2 = (data[i] >> 6) & 1;
if (bit2 == 0) return false;
// 11
int bit3 = (data[i] >> 5) & 1;
if (bit3 == 0) {
// 110xxxxx 10xxxxxx
if ((++ i) < data.size()) {
if (is10x(data[i])) {
continue;
}
return false;
} else {
return false;
}
}
int bit4 = (data[i] >> 4) & 1;
if (bit4 == 0) {
// 1110xxxx 10xxxxxx 10xxxxxx
if (i + 2 < data.size()) {
if (is10x(data[i + 1]) && is10x(data[i + 2])) {
i += 2;
continue;
}
return false;
} else {
return false;
}
}
int bit5 = (data[i] >> 3) & 1;
if (bit5 == 1) return false;
if (i + 3 < data.size()) {
if (is10x(data[i + 1]) && is10x(data[i + 2]) && is10x(data[i + 3])) {
i += 3;
continue;
}
return false;
} else {
return false;
}
}
return true;
}
private:
inline bool is10x(int a) {
int bit1 = (a >> 7) & 1;
int bit2 = (a >> 6) & 1;
return (bit1 == 1) && (bit2 == 0);
}
};

std::vector<BYTE> readFile(std::string filename)
/*
* From https://stackoverflow.com/a/21802936
*/
{
std::ifstream file(filename, std::ios::binary);
file.unsetf(std::ios::skipws);
std::streampos fileSize;

file.seekg(0, std::ios::end);
fileSize = file.tellg();
file.seekg(0, std::ios::beg);

std::vector<BYTE> vec;
vec.reserve(fileSize);

std::copy(std::istream_iterator<BYTE>(file),
std::istream_iterator<BYTE>(),
std::back_inserter(vec));

file.close();

return vec;
}

bool isUtf8SaveGame(std::string filename)
{
std::string path = GetSaveDir() + filename;
std::vector<BYTE> savegame = readFile(path);
Utf8Checker* check = new Utf8Checker;
BenjamenMeyer marked this conversation as resolved.
Show resolved Hide resolved
if (check->validUtf8(savegame)) {
return true;
} else {
BOOST_LOG_TRIVIAL(fatal) << boost::format("ERROR: save file %1% is not UTF-8") % path;
return false;
}
}

class MissionStringDat
{
public:
Expand Down
1 change: 1 addition & 0 deletions engine/src/savegame.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,5 +155,6 @@ const std::string& GetCurrentSaveGame();
std::string SetCurrentSaveGame( std::string newname );
const std::string& GetSaveDir();
void CopySavedShips( std::string filename, int player_num, const std::vector< std::string > &starships, bool load );
bool isUtf8SaveGame(std::string filename);
#endif