Skip to content

Commit

Permalink
adding wildcard to the from-clause-matcher; its possible to use "*" (…
Browse files Browse the repository at this point in the history
…match all operator); it is part of the specs, and also add great flexability

Signed-off-by: Gal Salomon <[email protected]>
  • Loading branch information
galsalomon66 committed Jul 9, 2024
1 parent 4243bae commit b5c9cb3
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 3 deletions.
13 changes: 12 additions & 1 deletion include/s3select.h
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,7 @@ struct s3select : public bsc::grammar<s3select>

json_s3_object = ((S3SELECT_KW(JSON_ROOT_OBJECT)) >> *(bsc::str_p(".") >> json_path_element))[BOOST_BIND_ACTION(push_json_from_clause)];

json_path_element = bsc::lexeme_d[+( bsc::alnum_p | bsc::str_p("_")) ];
json_path_element = bsc::lexeme_d[+( bsc::alnum_p | bsc::str_p("_") | bsc::str_p("*")) ];

object_path = "/" >> *( fs_type >> "/") >> fs_type;

Expand Down Expand Up @@ -3336,6 +3336,17 @@ class json_object : public base_s3object

int push_key_value_into_scratch_area_per_star_operation(s3selectEngine::scratch_area::json_key_value_t& key_value)
{
//TODO this is wrong , equal keys should override each other. i.e. the later key defines the value.
// the input below with 'select * from s3object[*];' will keep push new keys, even upon identical keys
// pushing with key-path will avoid that (push "endless" identical keys).
// there could be a use case where the keys are unique and star-operation must retrieve a huge line
// for this we should define a limitation(number of unique keys per a single retrieved row).
// { "root" : [
//{ "c1":"815", "c2":"113" },
//{ "c1":"256", "c2":"342" }
//]
//}

m_sa->get_star_operation_cont()->push_back( key_value );
return 0;
}
Expand Down
29 changes: 28 additions & 1 deletion include/s3select_json_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -645,9 +645,36 @@ class JsonParserHandler : public rapidjson::BaseReaderHandler<rapidjson::UTF8<>,
return true;
}

template<typename predicate>
bool from_clause_matcher(std::vector<std::string>& _key_path,
std::vector<std::string>& _from_clause,
predicate p)
{
//iterate on both path's
//upon a part of from-clause is '*' it consider 'equal' to the counter part (projection), should skip to the next part
//
//from-clause = a.*.c ; projection-key = a.b.c ; since the '*' is on the secod position
//it means b=exists-in-from-clause, it should skip to next part (the third).

std::vector<std::string>::iterator it_key_path = _key_path.begin();
std::vector<std::string>::iterator it_from_clause = _from_clause.begin();

while(it_from_clause != _from_clause.end())
{
if (it_key_path == _key_path.end()) return false;

if ((it_from_clause->compare("*")==0) || p(*it_key_path,*it_from_clause))
{it_key_path++; it_from_clause++;}
else
return false;
};
return true;

};


void set_prefix_match(){
if(from_clause.size() == 0 || std::equal(key_path.begin(), key_path.end(), from_clause.begin(), from_clause.end(), iequal_predicate)) {
if(from_clause.size() == 0 || from_clause_matcher(key_path, from_clause, iequal_predicate)) {
prefix_match = true; //it is not prefix_match in the case its a key/value . it is a prefix match in the case it is a key of array or key of an object
}
}
Expand Down
2 changes: 1 addition & 1 deletion include/s3select_oper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1157,7 +1157,7 @@ class scratch_area
public:

typedef std::pair<std::vector<std::string>,value> json_key_value_t;
typedef std::vector< json_key_value_t > json_star_op_cont_t;
typedef std::vector< json_key_value_t > json_star_op_cont_t;//TODO should use a std::map(unique-keys)
json_star_op_cont_t m_json_star_operation;

scratch_area():m_upper_bound(-1),parquet_type(false),buff_loc(0),max_json_idx(-1)
Expand Down
7 changes: 7 additions & 0 deletions test/s3select_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3578,6 +3578,13 @@ std::string input_json_data = R"(
input_query = "select _1.c1 from s3object[*].root.nested_obj.nested2;";
run_json_query(input_query.c_str(), input_json_data, result);
ASSERT_EQ(result,expected_result);

//the wildcard in from-clause means to skip a path-part(consider equal to projection counter part)
expected_result=R"(c1_value
)";
input_query = "select _1.c1 from s3object[*].*.nested_obj.*.nested4;";
run_json_query(input_query.c_str(), input_json_data, result);
ASSERT_EQ(result,expected_result);

}

Expand Down

0 comments on commit b5c9cb3

Please sign in to comment.