Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor decode: remove pyyjson_op IR #7

Merged
merged 14 commits into from
Dec 19, 2024
932 changes: 313 additions & 619 deletions src/decode/decode.c

Large diffs are not rendered by default.

174 changes: 33 additions & 141 deletions src/decode/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,99 +3,10 @@
#include "pyyjson.h"
#include <assert.h>

// hot spot:
// string -> float,int->array,dict->null,false,true. least: uint, nan, inf
#define PYYJSON_NO_OP (0)
#define PYYJSON_OP_STRING (1)
#define PYYJSON_OP_NUMBER (2)
#define PYYJSON_OP_CONTAINER (3)
#define PYYJSON_OP_CONSTANTS (4)
#define PYYJSON_OP_NAN_INF (5)
// mask: 7 = (1 << 3) - 1, to cover 0~5
#define PYYJSON_OP_MASK (7)
// PYYJSON_OP_BITCOUNT_MAX = 3, to cover 0~5
#define PYYJSON_OP_BITCOUNT_MAX (3)
// higher start: 8
#define PYYJSON_OP_HIGHER_START (1 << PYYJSON_OP_BITCOUNT_MAX)
//string flags ~~
#define PYYJSON_STRING_FLAG_ASCII (0 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_STRING_FLAG_LATIN1 (1 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_STRING_FLAG_UCS2 (2 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_STRING_FLAG_UCS4 (3 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_STRING_FLAG_UCS_TYPE_MASK (3 << PYYJSON_OP_BITCOUNT_MAX)
// is key ~~
#define PYYJSON_STRING_FLAG_OBJ_KEY (4 << PYYJSON_OP_BITCOUNT_MAX)
// num flags ~~
#define PYYJSON_NUM_FLAG_FLOAT (0 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_NUM_FLAG_INT (1 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_NUM_FLAG_UINT (2 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_NUM_FLAG_MASK (3 << PYYJSON_OP_BITCOUNT_MAX)
// container flags ~~
#define PYYJSON_CONTAINER_FLAG_ARRAY (0 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_CONTAINER_FLAG_DICT (1 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_CONTAINER_FLAG_MASK (1 << PYYJSON_OP_BITCOUNT_MAX)
// constants flags
#define PYYJSON_CONSTANTS_FLAG_NULL (0 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_CONSTANTS_FLAG_FALSE (1 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_CONSTANTS_FLAG_TRUE (2 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_CONSTANTS_FLAG_MASK (3 << PYYJSON_OP_BITCOUNT_MAX)
// nan inf flags
#define PYYJSON_NAN_INF_FLAG_NAN (0 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_NAN_INF_FLAG_INF (1 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_NAN_INF_FLAG_SIGNED (2 << PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_NAN_INF_FLAG_MASK_WITHOUT_SIGNED (1 << PYYJSON_OP_BITCOUNT_MAX)
//interpret flags
#define PYYJSON_MATCH_FLAG(x) case ((x) >> PYYJSON_OP_BITCOUNT_MAX)
#define PYYJSON_GET_FLAG(x, mask) (((x) & (mask)) >> PYYJSON_OP_BITCOUNT_MAX)
// end flags
typedef uint32_t op_type;
#define PYYJSON_OP_HEAD pyyjson_op_base op_base;
#define PYYJSON_READ_OP(_op) (((pyyjson_op_base *) (_op))->op)
#define PYYJSON_WRITE_OP(_ptr, _code) \
do { \
((pyyjson_op_base *) (_ptr))->op = (_code); \
} while (0)


#ifdef PYYJSON_64BIT
#define PYYJSON_OP_PADDING char pad[4];
#else
#define PYYJSON_OP_PADDING
#endif

// size = 4. This should not be used directly
typedef struct pyyjson_op_base {
op_type op;
} pyyjson_op_base;

// size = 4 / 8
typedef struct pyyjson_op {
PYYJSON_OP_HEAD
PYYJSON_OP_PADDING
} pyyjson_op;

// size = 12 / 16
typedef struct pyyjson_number_op {
PYYJSON_OP_HEAD
union {
int64_t i;
uint64_t u;
double f;
} data;
} pyyjson_number_op;

// size = 12 / 24
typedef struct pyyjson_string_op {
PYYJSON_OP_HEAD
char *data;
Py_ssize_t len;
} pyyjson_string_op;

// size = 8 / 16
typedef struct pyyjson_container_op {
PYYJSON_OP_HEAD
Py_ssize_t len;
} pyyjson_container_op;
#define PYYJSON_STRING_TYPE_ASCII 0
#define PYYJSON_STRING_TYPE_LATIN1 1
#define PYYJSON_STRING_TYPE_UCS2 2
#define PYYJSON_STRING_TYPE_UCS4 4


/** 16/32/64-bit vector */
Expand All @@ -109,22 +20,24 @@ typedef union v32_uni { v32 v; u32 u; } v32_uni;
typedef union v64_uni { v64 v; u64 u; } v64_uni;


PyObject *pyyjson_op_loads(pyyjson_op *op_sequence, size_t obj_stack_maxsize);
typedef struct DecodeObjStackInfo {
PyObject **cur_write_result_addr;
PyObject **result_stack;
PyObject **result_stack_end;
} DecodeObjStackInfo;

extern PyObject *JSONDecodeError;
typedef struct DecodeCtnWithSize DecodeCtnWithSize;

typedef PyObject *pyyjson_cache_type;
extern pyyjson_cache_type AssociativeKeyCache[PYYJSON_KEY_CACHE_SIZE];
typedef struct DecodeCtnStackInfo {
DecodeCtnWithSize *ctn;
DecodeCtnWithSize *ctn_start;
DecodeCtnWithSize *ctn_end;
} DecodeCtnStackInfo;

// static assertions
static_assert((sizeof(pyyjson_number_op) % sizeof(pyyjson_op)) == 0, "size of pyyjson_number_op must be multiple of size of pyyjson_op");
static_assert((sizeof(pyyjson_string_op) % sizeof(pyyjson_op)) == 0, "size of pyyjson_string_op must be multiple of size of pyyjson_op");
static_assert((sizeof(pyyjson_container_op) % sizeof(pyyjson_op)) == 0, "size of pyyjson_container_op must be multiple of size of pyyjson_op");
static_assert(sizeof(long long) == 8, "size of long long must be 8 bytes");
static_assert(sizeof(unsigned long long) == 8, "size of unsigned long long must be 8 bytes");
static_assert(sizeof(double) == 8, "size of double must be 8 bytes");

extern PyObject *JSONDecodeError;

typedef PyObject *pyyjson_cache_type;


/*==============================================================================
Expand Down Expand Up @@ -326,44 +239,45 @@ force_inline u32 byte_load_4(const void *src) {
* These functions are used by JSON reader to read literals and comments.
*============================================================================*/

force_inline bool pyyjson_decode_inf(DecodeObjStackInfo *restrict decode_obj_stack_info, bool is_signed);

force_inline bool pyyjson_decode_nan(DecodeObjStackInfo *restrict decode_obj_stack_info, bool is_signed);

/** Read 'true' literal, '*cur' should be 't'. */
force_inline bool read_true(u8 **ptr) {
force_inline bool _read_true(u8 **ptr) {
u8 *cur = (u8 *)*ptr;
u8 **end = (u8 **)ptr;
if (likely(byte_match_4(cur, "true"))) {
// val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE;
*end = cur + 4;
return true;
}
return false;
}

/** Read 'false' literal, '*cur' should be 'f'. */
force_inline bool read_false(u8 **ptr) {
force_inline bool _read_false(u8 **ptr) {
u8 *cur = (u8 *)*ptr;
u8 **end = (u8 **)ptr;
if (likely(byte_match_4(cur + 1, "alse"))) {
// val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE;
*end = cur + 5;
return true;
}
return false;
}

/** Read 'null' literal, '*cur' should be 'n'. */
force_inline bool read_null(u8 **ptr) {
force_inline bool _read_null(u8 **ptr) {
u8 *cur = (u8 *)*ptr;
u8 **end = (u8 **)ptr;
if (likely(byte_match_4(cur, "null"))) {
// val->tag = YYJSON_TYPE_NULL;
*end = cur + 4;
return true;
}
return false;
}

/** Read 'Inf' or 'Infinity' literal (ignoring case). */
force_inline bool read_inf(bool sign, u8 **ptr, pyyjson_op **op) {
force_inline bool _read_inf(bool sign, u8 **ptr) {
u8 *hdr = (u8 *)(*ptr - sign);
u8 *cur = (u8 *)*ptr;
u8 **end = (u8 **)ptr;
Expand All @@ -380,26 +294,13 @@ force_inline bool read_inf(bool sign, u8 **ptr, pyyjson_op **op) {
cur += 3;
}
*end = cur;
// if (pre) {
// /* add null-terminator for previous raw string */
// if (*pre) **pre = '\0';
// *pre = cur;
// val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
// val->uni.str = (const char *)hdr;
// } else {
// val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
// val->uni.u64 = f64_raw_get_inf(sign);
// }
pyyjson_op* op_inf = *op;
PYYJSON_WRITE_OP(op_inf, PYYJSON_OP_NAN_INF | PYYJSON_NAN_INF_FLAG_INF | (sign ? PYYJSON_NAN_INF_FLAG_SIGNED : 0));
*op = (op_inf + 1);
return true;
}
return false;
}

/** Read 'NaN' literal (ignoring case). */
force_inline bool read_nan(bool sign, u8 **ptr, pyyjson_op **op) {
force_inline bool _read_nan(bool sign, u8 **ptr) {
u8 *hdr = (u8 *)(*ptr - sign);
u8 *cur = (u8 *)*ptr;
u8 **end = (u8 **)ptr;
Expand All @@ -408,28 +309,19 @@ force_inline bool read_nan(bool sign, u8 **ptr, pyyjson_op **op) {
(cur[2] == 'N' || cur[2] == 'n')) {
cur += 3;
*end = cur;
// if (pre) {
// /* add null-terminator for previous raw string */
// if (*pre) **pre = '\0';
// *pre = cur;
// val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
// val->uni.str = (const char *)hdr;
// } else {
// val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
// val->uni.u64 = f64_raw_get_nan(sign);
// }
pyyjson_op* op_nan = *op;
PYYJSON_WRITE_OP(op_nan, PYYJSON_OP_NAN_INF | PYYJSON_NAN_INF_FLAG_NAN | (sign ? PYYJSON_NAN_INF_FLAG_SIGNED : 0));
*op = (op_nan + 1);
return true;
}
return false;
}

/** Read 'Inf', 'Infinity' or 'NaN' literal (ignoring case). */
force_inline bool read_inf_or_nan(bool sign, u8 **ptr, pyyjson_op **op) {
if (read_inf(sign, ptr, op)) return true;
if (read_nan(sign, ptr, op)) return true;
force_inline bool read_inf_or_nan(DecodeObjStackInfo *decode_obj_stack_info, bool sign, u8 **ptr) {
if (_read_inf(sign, ptr)) {
return pyyjson_decode_inf(decode_obj_stack_info, sign);
}
if (_read_nan(sign, ptr)) {
return pyyjson_decode_nan(decode_obj_stack_info, sign);
}
return false;
}

Expand Down
Loading
Loading