Skip to content

Commit

Permalink
Test for checking results aws
Browse files Browse the repository at this point in the history
Added query generator which generates equivalent queries for aws
and ceph. After runing these queries, their results are matched.

Signed-off-by: Girjesh Rajoria <[email protected]>
  • Loading branch information
grajoria committed Mar 24, 2021
1 parent a8e6ec6 commit ad0a46d
Show file tree
Hide file tree
Showing 4 changed files with 409 additions and 0 deletions.
26 changes: 26 additions & 0 deletions test/queries_generator/generate_aws_cmds.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include <iostream>
#include <fstream>

using namespace std;

int main()
{
fstream query_file, cmd_file;
query_file.open("aws_queries.txt", ios::in);
cmd_file.open("aws_cmds.sh", ios::out);
cmd_file << "#!/bin/sh\nset -x\nset -e\n\n";
cmd_file << "mkdir -p aws_results\n";
string bucket, csv_file, query, aws_cmd;
cout << "Enter bucket name: ";
cin >> bucket;
cout << "Enter file name: ";
cin >> csv_file;
for(int i = 1; getline(query_file, query); i++)
{
aws_cmd = "aws s3api select-object-content --bucket " + bucket + " --key " + csv_file + " --expression-type \'SQL\' --input-serialization \'{\"CSV\": {}, \"CompressionType\": \"NONE\"}\' --output-serialization \'{\"CSV\": {}}\' --profile openshift-dev --expression \"" + query + "\" \"aws_results/output" + to_string(i) + ".csv\"";
cmd_file << aws_cmd << endl;
}
cmd_file.close();
query_file.close();
return 0;
}
325 changes: 325 additions & 0 deletions test/queries_generator/queries_generator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,325 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <bits/stdc++.h>
#define NUM_COLUMN 3

using namespace std;

enum Return_type { INTEGER = 0,
STRING = 1,
TIMESTAMP = 2,
MIX_COL_NUM = 3,
COLUMN = 4,
NUMBER = 5};

auto random_arth_op = [](){std::string op="+-*/";return op[rand()%op.size()];};

auto random_compare_op = []()
{vector<string> op={">", "<", ">=", "<=", "==", "!="};
return op[ rand() % op.size() ];
};

auto random_date_part = []()
{vector<string> op={"year", "month", "day", "hour", "minute", "second"};
return op[ rand() % op.size() ];
};

/*auto random_date_part_extract = []()
{vector<string> op={"year", "month", "day", "hour", "minute", "second",
"timezone_hour", "timezone_minute"};
return op[ rand() % op.size() ];
};*/

string random_timestamp_string(string& aws_expr)
{
auto year = [](){return rand()%100 + 1900;};
auto month = [](){return 1 + rand()%12;};
auto day = [](){return 1 + rand()%28;};
auto hours = [](){return rand()%24;};
auto minutes = [](){return rand()%60;};
auto seconds = [](){return rand()%60;};
auto fraction_sec = [](){return rand()%1000000;};
stringstream timestamp_str;

timestamp_str << year() << "-" << std::setw(2) << std::setfill('0') << month() << "-" << std::setw(2) << std::setfill('0') << day() << "T" <<std::setw(2) << std::setfill('0') << hours() << ":" << std::setw(2) << std::setfill('0') << minutes() << ":" << std::setw(2) << std::setfill('0') <<seconds() << "." << fraction_sec() << "Z";
aws_expr = timestamp_str.str();
return aws_expr;
}

string random_tm_format_string()
{
auto random_format = []()
{vector<string> op={"yyyyy ", "yyyy ", "yyy ", "yy ", "y ", "MMMMM ", "MMMM ", "MMM ", "MM ", "M ", "dd ", "d ", "a ", "hh ", "h ", "HH ", "H ", "mm ", "m ", "ss ", "s ", "SSSSSSSSS ", "SSSSSS ", "SSSSS ", "SSS ", "SS ", "S ", "n ", ": ", "- ", " "};
return op[ rand() % op.size() ];
};
int loop = rand() % 10;
string frmt;
while(loop)
{
frmt += random_format();
loop--;
}
return frmt;
}

string random_col(string& aws_expr)
{
int num = 1 + (rand() % NUM_COLUMN);
aws_expr = "cast(_" + to_string(num) + " as int)";
return "int(_" + to_string(num) + ")";
}

string random_number(string& aws_expr)
{
int num = rand() % 10 + 1;
aws_expr = to_string(num);
return "int(" + to_string(num) + ")";
}

string random_num_expr(int depth, string& aws_expr)
{
string aws_expr1, aws_expr2, ceph_expr, op;
if (depth == 0)
{
ceph_expr = random_number(aws_expr1);
aws_expr = aws_expr1;
return ceph_expr;
}
op = random_arth_op();
ceph_expr = random_num_expr(depth-1, aws_expr1) + op +
random_num_expr(depth-1, aws_expr2);
aws_expr = aws_expr1 + op + aws_expr2;
return ceph_expr;
}

string random_num_col_expr(int depth, string& aws_expr)
{
string aws_expr1, aws_expr2, ceph_expr, op;
if (depth == 0)
{
if ((rand() % 2) == 0)
{
ceph_expr = random_col(aws_expr1);
aws_expr = aws_expr1;
return ceph_expr;
}
else
{
ceph_expr = random_number(aws_expr1);
aws_expr = aws_expr1;
return ceph_expr;
}
}
op = random_arth_op();
ceph_expr = random_num_col_expr(depth-1, aws_expr1) + op +
random_num_col_expr(depth-1, aws_expr2);
aws_expr = aws_expr1 + op + aws_expr2;
return ceph_expr;
}

string random_query_expr(int depth, string& input_str, int type, string& aws_expr)
{
string ceph_expr;
if (depth == 0)
{
switch (type)
{
case INTEGER:
ceph_expr = random_number(aws_expr);
break;
case STRING:
ceph_expr = "\'" + input_str + "\'";
aws_expr = "\'" + input_str + "\'";
break;
case MIX_COL_NUM:
ceph_expr = random_num_col_expr(depth, aws_expr);
break;
case TIMESTAMP:
ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr) + "\')";
aws_expr = "to_timestamp(\'" + aws_expr + "\')";
break;
}
return ceph_expr;
}

int option;
if (type == INTEGER) //return type is int
{
string ceph_col, aws_col, aws_expr1, aws_expr2, op1, op2;
switch (option = rand() % 9)
{
case 0:
ceph_col = random_col(aws_col);
op1 = random_arth_op();
op2 = random_arth_op();
ceph_expr = "int(avg(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) +
") " + op2 + " " + random_num_expr(depth-1, aws_expr2) + ")";
aws_expr = "cast((avg(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2 +
") as int)";
break;
case 1:
ceph_col = random_col(aws_col);
op1 = random_arth_op();
op2 = random_arth_op();
ceph_expr = "count(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) +
") " + op2 + " " + random_num_expr(depth-1, aws_expr2);
aws_expr = "count(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
break;
case 2:
ceph_col = random_col(aws_col);
op1 = random_arth_op();
op2 = random_arth_op();
ceph_expr = "max(" + ceph_col + op1 + random_num_col_expr(depth-1,aws_expr1) + ") " +
op2 + " " + random_num_expr(depth-1, aws_expr2);
aws_expr = "max(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
break;
case 3:
ceph_col = random_col(aws_col);
op1 = random_arth_op();
op2 = random_arth_op();
ceph_expr = "min(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + ") " +
op2 + " " + random_num_expr(depth-1, aws_expr2);
aws_expr = "min(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
break;
case 4:
ceph_col = random_col(aws_col);
op1 = random_arth_op();
op2 = random_arth_op();
ceph_expr = "sum(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) +
") " + op2 + " " + random_num_expr(depth-1, aws_expr2);
aws_expr = "sum(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2;
break;
case 5:
ceph_expr = "char_length(" + random_query_expr(depth-1, input_str, STRING,
aws_expr1) + ")";
aws_expr = "char_length(" + aws_expr1 + ")";
break;
case 6:
ceph_expr = "character_length(" + random_query_expr(depth-1, input_str, STRING,
aws_expr1) + ")";
aws_expr = "character_length(" + aws_expr1 + ")";
break;
case 7:
op1 = random_date_part();
ceph_expr = "extract(" + op1 + " from " + random_query_expr(depth-1, input_str,
TIMESTAMP, aws_expr1) + ")";
aws_expr = "extract(" + op1 + " from " + aws_expr1 + ")";
break;
case 8:
op1 = random_date_part();
ceph_expr = "date_diff(" + op1 + ", " + random_query_expr(depth-1, input_str,
TIMESTAMP, aws_expr1) + ", " + random_query_expr(depth-1, input_str,
TIMESTAMP, aws_expr2) + ")";
aws_expr = "date_diff(" + op1 + ", " + aws_expr1 + ", " + aws_expr2 + ")";
break;
}
}
else if (type == STRING) // return type is string
{
string aws_expr1, aws_expr2, aws_expr3;
switch (option = rand() % 4)
{
case 0:
ceph_expr = "lower(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) +
")";
aws_expr = "lower(" + aws_expr1 + ")";
break;
case 1:
ceph_expr = "upper(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) +
")";
aws_expr = "upper(" + aws_expr1 + ")";
break;
case 2:
ceph_expr = "substring(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) +
", " + random_query_expr(depth-1, input_str, INTEGER, aws_expr2) + ", " +
random_query_expr(depth-1, input_str, INTEGER, aws_expr3) + ")";
aws_expr = "substring(" + aws_expr1 + ", " + aws_expr2 + ", " + aws_expr3 + ")";
break;
case 3:
aws_expr2 = random_tm_format_string();
ceph_expr = "to_string(" + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr1)
+ ", \'" + aws_expr2 + "\')";
aws_expr = "to_string(" + aws_expr1 + ", \'" + aws_expr2 + "\')";
break;
}
}
else if (type == TIMESTAMP) // return type is TIMESTAMP
{
string aws_expr1, aws_expr2, date_part;
switch (option = rand() % 2)
{
case 0:
date_part = random_date_part();
ceph_expr = "date_add(" + date_part + ", " + random_number(aws_expr1) + ", " +
random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr2) + ")";
aws_expr = "date_add(" + date_part + ", " + aws_expr1 + ", " + aws_expr2 + ")";
break;
case 1:
ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr1) + "\')";
aws_expr = "to_timestamp(\'" + aws_expr1 + "\')";
break;
}
}
else if (type == MIX_COL_NUM)
{
ceph_expr = random_num_col_expr(depth-1, aws_expr);
}
else if (type == COLUMN) // return type integer column number
{
ceph_expr = random_col(aws_expr);
}
else if (type == NUMBER) // return type randon number
{
ceph_expr = random_number(aws_expr);
}
else
{
aws_expr = "error";
ceph_expr = "error";
}
return ceph_expr;
}

int main()
{
srand(time(0));
int reps, depth;
fstream query_file, aws_query_file;
query_file.open("queries.txt", ios::out);
aws_query_file.open("aws_queries.txt", ios::out);
string input_str = " %%AbCdEfGhIjKlMnOpQrStUvWxYz## ";
cout << "Enter number of quries to be generated: ";
cin >> reps;
cout << "Enter depth of queries to be generated: ";
cin >> depth;
if(query_file.is_open() && aws_query_file.is_open()) //checking whether the file is open
{
while (reps)
{
string aws_expr;
int type;
string ceph_query = "select ";
string aws_query = "select ";
/*int projection = rand() % 4;
while (projection > 1)
{
type = rand() % 4;
ceph_query = ceph_query + random_query_expr(depth, input_str,
type, aws_expr) + ", ";
aws_query = aws_query + aws_expr + ", ";
projection--;
}*/
type = rand() % 4;
ceph_query = ceph_query + random_query_expr(depth, input_str, type,
aws_expr)+ " from stdin;";
aws_query = aws_query + aws_expr + " from s3object;";
query_file << ceph_query << endl;
aws_query_file << aws_query <<endl;
reps--;
}
query_file.close();
}
return 0;
}

13 changes: 13 additions & 0 deletions test/queries_generator/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/sh
set -x
set -e

g++ -o queries_generator queries_generator.cpp
g++ -o generate_aws_cmds generate_aws_cmds.cpp

./queries_generator
./generate_aws_cmds

chmod +x aws_cmds.sh

./aws_cmds.sh
Loading

0 comments on commit ad0a46d

Please sign in to comment.