Skip to content

Commit

Permalink
fix: bom detection on source files moved into cjit core
Browse files Browse the repository at this point in the history
cleaned up a bit more main.c by moving bom detection which is only
needed inside cjit_add_source

eventually this needs a bit more cleanup to normalize the way files
are loaded
  • Loading branch information
jaromil committed Dec 29, 2024
1 parent 4b13f6f commit eaf08dc
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 97 deletions.
52 changes: 50 additions & 2 deletions src/cjit.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h> // getpid/write
#include <fcntl.h> // open(2)
#include <inttypes.h>
#include <sys/stat.h> // fstat(2)

#define MAX_PATH 260 // rather short paths
#define MAX_STRING 20480 // max 20KiB strings
Expand Down Expand Up @@ -251,9 +254,54 @@ static int has_source_extension(const char *path) {
return (is_source? 1 : -1);
}

static int detect_bom(const char *filename,size_t *filesize) {
uint8_t bom[3];
int res;
int fd = open(filename, O_RDONLY | O_BINARY);
if(fd<0) {
_err("%s: error opening file: %s",__func__,filename);
_err("%s",strerror(errno));
return -1;
}
struct stat st;
if (fstat(fd, &st) == -1) {
_err("%s: error analyzing file: %s",__func__,filename);
_err("%s",strerror(errno));
close(fd);
return -1;
}
*filesize = st.st_size;
res = read(fd,bom,3);
close(fd);
if (res!=3) {
_err("%s: error reading file: %s",__func__,filename);
_err("%s",strerror(errno));
return -1;
}
// _err("%s bom: %x %x %x",filename,bom[0],bom[1],bom[2]);
if (bom[0] == 0xFF && bom[1] == 0xFE) {
return 1; // UTF-16 LE
} else if (bom[0] == 0xFE && bom[1] == 0xFF) {
return 2; // UTF-16 BE
} else if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF) {
return 3; // UTF-8
} else {
return 0; // No BOM
}
}

static bool cjit_add_source(CJITState *cjit, const char *path) {
long length = file_size(path);
if (length == -1) return false;
size_t length;
int res = detect_bom(path,&length);
if(res<0) {
_err("Cannot open file: %s",path);
_err("Execution aborted.");
return false;
} else if(res>0) {
_err("UTF BOM detected in file: %s",path);
_err("Encoding is not yet supported, execution aborted.");
return false;
}
FILE *file = fopen(path, "rb");
if (!file) {
_err("%s: fopen error: ", __func__, strerror(errno));
Expand Down
2 changes: 0 additions & 2 deletions src/cjit.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ extern bool extract_assets(CJITState *CJIT);

/////////////
// from file.c
extern int detect_bom(const char *filename);
extern long file_size(const char *filename);
extern char* file_load(const char *filename, unsigned int *len);
extern char *load_stdin();
extern char* dir_load(const char *path);
Expand Down
76 changes: 1 addition & 75 deletions src/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,82 +31,8 @@

#include <ftw.h> // _GNU_SOURCE

int detect_bom(const char *filename) {
uint8_t bom[3];
int res;
int fd = open(filename, O_RDONLY | O_BINARY);
res = read(fd,bom,3);
if (res!=3) {
_err("read error: %s",strerror(errno));
return -1;
}
close(fd);
// _err("%s bom: %x %x %x",filename,bom[0],bom[1],bom[2]);
if (bom[0] == 0xFF && bom[1] == 0xFE) {
return 1; // UTF-16 LE
} else if (bom[0] == 0xFE && bom[1] == 0xFF) {
return 2; // UTF-16 BE
} else if (bom[0] == 0xEF && bom[1] == 0xBB && bom[2] == 0xBF) {
return 3; // UTF-8
} else {
return 0; // No BOM
}
}

bool append_path(char **stored_path, const char *new_path) {
// TODO: sanitize input checking only path chars are there
// support both / and \ for windows
if (*stored_path == NULL) {
// If stored_path is NULL, allocate memory and copy new_path
*stored_path = malloc(strlen(new_path) + 1);
if (*stored_path == NULL) {
_err("Memory allocation failed");
return(false);
}
strcpy(*stored_path, new_path);
} else {
// If stored_path is not NULL, append new_path separated by ':'
size_t new_length = strlen(*stored_path) + strlen(new_path) + 2;
char *temp = realloc(*stored_path, new_length);
if (temp == NULL) {
_err("Memory allocation failed");
return(false);
}
*stored_path = temp;
strcat(*stored_path, ":");
strcat(*stored_path, new_path);
}
return(true);
}

bool prepend_path(char **stored_path, const char *new_path) {
if (*stored_path == NULL) {
// If stored_path is NULL, allocate memory and copy new_path
*stored_path = malloc(strlen(new_path) + 1);
if (*stored_path == NULL) {
_err("Memory allocation failed");
return(false);
}
strcpy(*stored_path, new_path);
} else {
// If stored_path is not NULL, prepend new_path separated by ':'
size_t new_length = strlen(*stored_path) + strlen(new_path) + 2;
char *temp = malloc(new_length);
if (temp == NULL) {
_err("Memory allocation failed");
return(false);
}
strcpy(temp, new_path);
strcat(temp, ":");
strcat(temp, *stored_path);
free(*stored_path);
*stored_path = temp;
}
return(true);
}

// Function to get the length of a file in bytes
long file_size(const char *filename) {
static long file_size(const char *filename) {
FILE *file = fopen(filename, "rb");
if (file == NULL) {
_err("%s: fopen error: %s",__func__,strerror(errno));
Expand Down
19 changes: 1 addition & 18 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -269,24 +269,7 @@ int main(int argc, char **argv) {
goto endgame;
} else free(stdin_code);
} else { // load any file path
int res = detect_bom(code_path);
// returned values:
// 0 : no BOM, all OK
// <0 : file not found
// 1 : BOM found, UTF16-LE
// 2 : BOM found, UTF16-BE
// 3 : BOM found, UTF8
if(res ==0) {
cjit_add_file(CJIT, code_path);
} else if(res<0) {
_err("Cannot open file: %s",code_path);
_err("Execution aborted.");
goto endgame;
} else {
_err("UTF BOM detected in file: %s",code_path);
_err("Encoding is not yet supported, execution aborted.");
goto endgame;
}
cjit_add_file(CJIT, code_path);
}
}
}
Expand Down

0 comments on commit eaf08dc

Please sign in to comment.