From bcdd066379c69c4d6e22b926cc56363500d170c9 Mon Sep 17 00:00:00 2001 From: Roman Fursov Date: Thu, 8 Dec 2022 19:41:59 +0100 Subject: [PATCH] Initial set up with the hook backbone implementation (#1) * Initial set up with the hook backbone implementation --- BUILD | 6 + README.md | 17 +++ WORKSPACE | 101 ++++++++++++++ dist/BUILD | 23 ++++ docs/code-of-conduct.md | 93 +++++++++++++ docs/contributing.md | 33 +++++ src/BUILD | 32 +++++ .../cloud/bigquery/dwhassessment/hooks/BUILD | 33 +++++ .../dwhassessment/hooks/ExecutionMode.java | 28 ++++ .../hooks/MigrationAssessmentLoggingHook.java | 127 ++++++++++++++++++ .../bigquery/dwhassessment/hooks/README.md | 5 + .../hooks/avro/AvroSchemaLoader.java | 38 ++++++ .../bigquery/dwhassessment/hooks/avro/BUILD | 28 ++++ .../dwhassessment/hooks/avro/QueryEvents.avsc | 97 +++++++++++++ .../cloud/bigquery/dwhassessment/hooks/BUILD | 42 ++++++ .../MigrationAssessmentLoggingHookTest.java | 107 +++++++++++++++ 16 files changed, 810 insertions(+) create mode 100644 BUILD create mode 100644 WORKSPACE create mode 100644 dist/BUILD create mode 100644 docs/code-of-conduct.md create mode 100644 docs/contributing.md create mode 100644 src/BUILD create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/BUILD create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/ExecutionMode.java create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHook.java create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/README.md create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/AvroSchemaLoader.java create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/BUILD create mode 100644 src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/QueryEvents.avsc create mode 100644 src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/BUILD create mode 100644 src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHookTest.java diff --git a/BUILD b/BUILD new file mode 100644 index 0000000..4c4cd32 --- /dev/null +++ b/BUILD @@ -0,0 +1,6 @@ +package(default_visibility = ["//src:internal"]) + +exports_files([ + "LICENSE", + "README.md", +]) diff --git a/README.md b/README.md index 3146e4b..3a972e7 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,20 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + +## Building + +Install bazel: + +``` +sudo apt-get update +sudo apt-get install bazel +``` + +Build the dist zip files: + +``` +bazel build //dist:all +``` + +Zip files will be available under `bazel-bin/dist` directory. diff --git a/WORKSPACE b/WORKSPACE new file mode 100644 index 0000000..4efc33c --- /dev/null +++ b/WORKSPACE @@ -0,0 +1,101 @@ +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +RULES_JVM_EXTERNAL_TAG = "4.0" + +RULES_JVM_EXTERNAL_SHA = "31701ad93dbfe544d597dbe62c9a1fdd76d81d8a9150c2bf1ecf928ecdf97169" + +http_archive( + name = "rules_jvm_external", + sha256 = RULES_JVM_EXTERNAL_SHA, + strip_prefix = "rules_jvm_external-%s" % RULES_JVM_EXTERNAL_TAG, + url = "https://github.com/bazelbuild/rules_jvm_external/archive/%s.zip" % RULES_JVM_EXTERNAL_TAG, +) + +load("@rules_jvm_external//:defs.bzl", "maven_install") +load("@rules_jvm_external//:specs.bzl", "maven") + +# All artifacts here should have "neverlink = True" unless it's really required to distribute them +maven_install( + artifacts = [ + maven.artifact( + "com.google.guava", + "guava", + "31.0-jre", + neverlink = True, + ), + maven.artifact( + "org.apache.avro", + "avro", + "1.10.2", + neverlink = True, + ), + maven.artifact( + "org.apache.curator", + "apache-curator", + "2.7.1", + neverlink = True, + ), + maven.artifact( + "org.apache.hive", + "hive-exec", + "2.2.0", + neverlink = True, + ), + maven.artifact( + "org.slf4j", + "slf4j-api", + "1.7.10", + neverlink = True, + ), + ], + excluded_artifacts = [ + "org.pentaho:pentaho-aggdesigner-algorithm", + ], + fetch_sources = True, + repositories = [ + "https://maven.google.com", + "https://repo1.maven.org/maven2", + ], + version_conflict_policy = "pinned", +) + +# Tests need dependencies in the runtime. Since there is no way to mark the dependency as both +# "neverlink" and "testonly", create a separate declaration specifically for tests. +# Code in java/ folder must not depend on this declaration. +maven_install( + name = "maven_tests", + artifacts = [ + "org.apache.hadoop:hadoop-common:2.9.0", + "org.apache.hadoop:hadoop-mapreduce-client-common:2.9.0", + "org.apache.hadoop:hadoop-mapreduce-client-core:2.9.0", + "org.apache.hive:hive-exec:2.2.0", + "com.google.truth:truth:1.1.3", + "org.apache.curator:apache-curator:2.7.1", + "junit:junit:4.13.2", + "org.mockito:mockito-core:3.11.1", + "org.slf4j:slf4j-api:1.7.10", + ], + excluded_artifacts = [ + "org.pentaho:pentaho-aggdesigner-algorithm", + ], + fetch_sources = True, + repositories = [ + "https://maven.google.com", + "https://repo1.maven.org/maven2", + ], +) + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +http_archive( + name = "rules_pkg", + sha256 = "8a298e832762eda1830597d64fe7db58178aa84cd5926d76d5b744d6558941c2", + urls = [ + "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.7.0/rules_pkg-0.7.0.tar.gz", + "https://github.com/bazelbuild/rules_pkg/releases/download/0.7.0/rules_pkg-0.7.0.tar.gz", + ], +) + +load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies") + +rules_pkg_dependencies() diff --git a/dist/BUILD b/dist/BUILD new file mode 100644 index 0000000..8cf6cc0 --- /dev/null +++ b/dist/BUILD @@ -0,0 +1,23 @@ +load("@rules_pkg//:mappings.bzl", "pkg_attributes", "pkg_filegroup", "pkg_files", "strip_prefix") +load("@rules_pkg//:pkg.bzl", "pkg_zip") + +pkg_files( + name = "hooks_files", + srcs = [ + "//:LICENSE", + "//:README.md", + "//src/java/com/google/cloud/bigquery/dwhassessment/hooks:HiveMigrationAssessmentQueryLogsHooks_deploy.jar", + ], + attributes = pkg_attributes( + mode = "0555", + ), + prefix = "", +) + +pkg_zip( + name = "hadoop-migration-assessment-hooks", + srcs = [ + ":hooks_files", + ], + package_dir = "hadoop-migration-assessment-hooks", +) diff --git a/docs/code-of-conduct.md b/docs/code-of-conduct.md new file mode 100644 index 0000000..dc079b4 --- /dev/null +++ b/docs/code-of-conduct.md @@ -0,0 +1,93 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of +experience, education, socio-economic status, nationality, personal appearance, +race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban temporarily or permanently any +contributor for other behaviors that they deem inappropriate, threatening, +offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +This Code of Conduct also applies outside the project spaces when the Project +Steward has a reasonable belief that an individual's behavior may have a +negative impact on the project or its community. + +## Conflict Resolution + +We do not believe that all conflict is bad; healthy debate and disagreement +often yield positive results. However, it is never okay to be disrespectful or +to engage in behavior that violates the project’s code of conduct. + +If you see someone violating the code of conduct, you are encouraged to address +the behavior directly with those involved. Many issues can be resolved quickly +and easily, and this gives people more control over the outcome of their +dispute. If you are unable to resolve the matter for any reason, or if the +behavior is threatening or harassing, report it. We are dedicated to providing +an environment where participants feel welcome and safe. + +Reports should be directed to *[PROJECT STEWARD NAME(s) AND EMAIL(s)]*, the +Project Steward(s) for *[PROJECT NAME]*. It is the Project Steward’s duty to +receive and address reported violations of the code of conduct. They will then +work with a committee consisting of representatives from the Open Source +Programs Office and the Google Open Source Strategy team. If for any reason you +are uncomfortable reaching out to the Project Steward, please email +opensource@google.com. + +We will investigate every complaint, but you may not receive a direct response. +We will use our discretion in determining when and how to follow up on reported +incidents, which may range from not taking action to permanent expulsion from +the project and project-sponsored spaces. We will notify the accused of the +report and provide them an opportunity to discuss it before any action is taken. +The identity of the reporter will be omitted from the details of the report +supplied to the accused. In potentially harmful situations, such as ongoing +harassment or threats to anyone's safety, we may take action without notice. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 1.4, +available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..87917f8 --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,33 @@ +# How to Contribute + +We'd love to accept your patches and contributions to this project. + +## Before you begin + +### Sign our Contributor License Agreement + +Contributions to this project must be accompanied by a +[Contributor License Agreement](https://cla.developers.google.com/about) (CLA). +You (or your employer) retain the copyright to your contribution; this simply +gives us permission to use and redistribute your contributions as part of the +project. + +If you or your current employer have already signed the Google CLA (even if it +was for a different project), you probably don't need to do it again. + +Visit to see your current agreements or to +sign a new one. + +### Review our Community Guidelines + +This project follows [Google's Open Source Community +Guidelines](https://opensource.google/conduct/). + +## Contribution process + +### Code Reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff --git a/src/BUILD b/src/BUILD new file mode 100644 index 0000000..ccdefca --- /dev/null +++ b/src/BUILD @@ -0,0 +1,32 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +load("@rules_java//java:defs.bzl", "java_library") + +package_group( + name = "internal", + packages = [ + "//", + "//dist/...", + "//src/...", + ], +) + +package_group( + name = "tests", + packages = [ + "//src/javatests/...", + ], +) + +package(default_visibility = [":internal"]) diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/BUILD b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/BUILD new file mode 100644 index 0000000..f03bb19 --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/BUILD @@ -0,0 +1,33 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +load("@rules_java//java:defs.bzl", "java_binary", "java_library", "java_test") + +package(default_visibility = ["//src:internal"]) + +java_library( + name = "hooks", + srcs = glob(["*.java"]), + deps = [ + "//src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro", + "@maven//:org_apache_hive_hive_exec_2_2_0", + "@maven//:org_slf4j_slf4j_api", + ], +) + +java_binary( + name = "HiveMigrationAssessmentQueryLogsHooks", + runtime_deps = [ + ":hooks", + ], +) diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/ExecutionMode.java b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/ExecutionMode.java new file mode 100644 index 0000000..945ea17 --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/ExecutionMode.java @@ -0,0 +1,28 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigquery.dwhassessment.hooks; + +/** + * Query execution mode. + * + *

Equal to Apache Hive HiveProtoLoggingHook.ExecutionMode enum + */ +public enum ExecutionMode { + MR, + TEZ, + LLAP, + NONE +} diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHook.java b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHook.java new file mode 100644 index 0000000..77b58d9 --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHook.java @@ -0,0 +1,127 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigquery.dwhassessment.hooks; + +import static org.apache.hadoop.hive.ql.hooks.Entity.Type.PARTITION; +import static org.apache.hadoop.hive.ql.hooks.Entity.Type.TABLE; + +import com.google.cloud.bigquery.dwhassessment.hooks.avro.AvroSchemaLoader; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; +import org.apache.hadoop.hive.ql.exec.tez.TezTask; +import org.apache.hadoop.hive.ql.hooks.Entity; +import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext; +import org.apache.hadoop.hive.ql.hooks.HookContext; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Hook, which captures query events and stores them as {@link GenericRecord}. */ +public class MigrationAssessmentLoggingHook implements ExecuteWithHookContext { + + private static final Logger LOG = LoggerFactory.getLogger(MigrationAssessmentLoggingHook.class); + + private static final Schema QUERY_EVENT_SCHEMA = AvroSchemaLoader.loadSchema("QueryEvents.avsc"); + + // Just for the example. Real hook will write it to the file + public List records = new ArrayList<>(); + + public void run(HookContext hookContext) throws Exception { + processQueryEventAvro(hookContext); + } + + private void processQueryEventAvro(HookContext hookContext) { + QueryPlan plan = hookContext.getQueryPlan(); + + LOG.info("Received hook notification for: {}", plan.getQueryId()); + + // Make a copy so that we do not modify hookContext conf. + HiveConf conf = new HiveConf(hookContext.getConf()); + List mrTasks = Utilities.getMRTasks(plan.getRootTasks()); + List tezTasks = Utilities.getTezTasks(plan.getRootTasks()); + ExecutionMode executionMode = getExecutionMode(mrTasks, tezTasks); + + GenericRecord queryEvent = + new GenericRecordBuilder(QUERY_EVENT_SCHEMA) + .set("QueryId", plan.getQueryId()) + .set("QueryText", plan.getQueryStr()) + .set("EventType", hookContext.getHookType().name()) + .set("Timestamp", plan.getQueryStartTime()) + .set("User", getUser(hookContext)) + .set("RequestUser", getRequestUser(hookContext)) + .set("ExecutionMode", executionMode.name()) + .set("Queue", getQueueName(executionMode, conf)) + .set("TablesRead", getTablesFromEntitySet(plan.getInputs())) + .set("TablesWritten", getTablesFromEntitySet(plan.getOutputs())) + .build(); + records.add(queryEvent); + LOG.info("Processed record: {}", queryEvent); + } + + private List getTablesFromEntitySet(Set entities) { + List tableNames = new ArrayList<>(); + for (Entity entity : entities) { + if (entity.getType() == TABLE || entity.getType() == PARTITION) { + tableNames.add(entity.getTable().getCompleteName()); + } + } + return tableNames; + } + + private String getUser(HookContext hookContext) { + return hookContext.getUgi().getShortUserName(); + } + + private String getRequestUser(HookContext hookContext) { + String requestUser = hookContext.getUserName(); + return requestUser == null ? hookContext.getUgi().getUserName() : requestUser; + } + + private ExecutionMode getExecutionMode(List mrTasks, List tezTasks) { + if (tezTasks.size() > 0) { + // Need to go in and check if any of the tasks is running in LLAP mode. + for (TezTask tezTask : tezTasks) { + if (tezTask.getWork().getLlapMode()) { + return ExecutionMode.LLAP; + } + } + return ExecutionMode.TEZ; + } + + return mrTasks.size() > 0 ? ExecutionMode.MR : ExecutionMode.NONE; + } + + private String getQueueName(ExecutionMode mode, HiveConf conf) { + switch (mode) { + case LLAP: + return conf.get(HiveConf.ConfVars.LLAP_DAEMON_QUEUE_NAME.varname); + case MR: + return "TODO_MAPREDUCE"; + case TEZ: + return "TODO_TEZ"; + case NONE: + default: + return null; + } + } +} diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/README.md b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/README.md new file mode 100644 index 0000000..ce6b07f --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/README.md @@ -0,0 +1,5 @@ +# Migration Assessment Hive Logging Hook + +This tool allows extracting query logs submitted to Hive server. + +Query logs information is required for the assessment for migration. diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/AvroSchemaLoader.java b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/AvroSchemaLoader.java new file mode 100644 index 0000000..79c50b5 --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/AvroSchemaLoader.java @@ -0,0 +1,38 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigquery.dwhassessment.hooks.avro; + +import static com.google.common.base.Preconditions.checkArgument; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import org.apache.avro.Schema; + +/** Loader for Avro schemas */ +public final class AvroSchemaLoader { + /** Loads Avro schema from files in path relative to this class */ + public static Schema loadSchema(String name) { + URL scriptUrl = AvroSchemaLoader.class.getResource(name); + String errorMessage = String.format("Resource '%s' does not exist.", name); + checkArgument(scriptUrl != null, errorMessage); + try (InputStream inputStream = scriptUrl.openStream()) { + return new Schema.Parser().parse(inputStream); + } catch (IOException e) { + throw new IllegalStateException(String.format("Error reading schema '%s'.", name), e); + } + } +} diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/BUILD b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/BUILD new file mode 100644 index 0000000..9099a0d --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/BUILD @@ -0,0 +1,28 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +load("@rules_java//java:defs.bzl", "java_library") + +package(default_visibility = ["//src:internal"]) + +java_library( + name = "avro", + srcs = glob(["*.java"]), + resources = glob([ + "*.avsc", + ]), + deps = [ + "@maven//:com_google_guava_guava", + "@maven//:org_apache_avro_avro", + ], +) diff --git a/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/QueryEvents.avsc b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/QueryEvents.avsc new file mode 100644 index 0000000..9c958ed --- /dev/null +++ b/src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro/QueryEvents.avsc @@ -0,0 +1,97 @@ +{ + "type": "record", + "name": "QueryEvents", + "fields": [ + { + "name": "QueryId", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "QueryText", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "EventType", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "ExecutionMode", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "RequestUser", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "User", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "Queue", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "OperationId", + "type": [ + "null", + "string" + ], + "default": null + }, + { + "name": "Timestamp", + "type": [ + "null", + { + "type": "long", + "logicalType": "timestamp-millis" + } + ], + "default": null + }, + { + "name": "TablesRead", + "type": { + "type": "array", + "items": "string" + }, + "default": [] + }, + { + "name": "TablesWritten", + "type": { + "type": "array", + "items": "string" + }, + "default": [] + } + ] +} \ No newline at end of file diff --git a/src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/BUILD b/src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/BUILD new file mode 100644 index 0000000..f83d7ae --- /dev/null +++ b/src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/BUILD @@ -0,0 +1,42 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +load("@rules_java//java:defs.bzl", "java_library", "java_test") + +java_library( + name = "tests", + testonly = 1, + srcs = glob(["*.java"]), + deps = [ + "//src/java/com/google/cloud/bigquery/dwhassessment/hooks", + "//src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro", + "@maven_tests//:com_google_truth_truth", + "@maven_tests//:junit_junit", + "@maven_tests//:org_apache_hadoop_hadoop_common", + "@maven_tests//:org_apache_hadoop_hadoop_mapreduce_client_common", + "@maven_tests//:org_apache_hadoop_hadoop_yarn_common", + "@maven_tests//:org_apache_hive_hive_common", + "@maven_tests//:org_apache_hive_hive_exec", + "@maven_tests//:org_mockito_mockito_core", + "@maven_tests//:org_slf4j_slf4j_api", + ], +) + +java_test( + name = "MigrationAssessmentLoggingHookTest", + size = "small", + test_class = "com.google.cloud.bigquery.dwhassessment.hooks.MigrationAssessmentLoggingHookTest", + runtime_deps = [ + ":tests", + ], +) diff --git a/src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHookTest.java b/src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHookTest.java new file mode 100644 index 0000000..1f8a5d6 --- /dev/null +++ b/src/javatests/com/google/cloud/bigquery/dwhassessment/hooks/MigrationAssessmentLoggingHookTest.java @@ -0,0 +1,107 @@ +/* + * Copyright 2022 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.google.cloud.bigquery.dwhassessment.hooks; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.cloud.bigquery.dwhassessment.hooks.avro.AvroSchemaLoader; +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecordBuilder; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.QueryState; +import org.apache.hadoop.hive.ql.hooks.HookContext; +import org.apache.hadoop.hive.ql.hooks.HookContext.HookType; +import org.apache.hadoop.hive.ql.log.PerfLogger; +import org.apache.hadoop.hive.ql.metadata.Hive; +import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; +import org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer; +import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnit; +import org.mockito.junit.MockitoRule; + +@RunWith(JUnit4.class) +public class MigrationAssessmentLoggingHookTest { + + @Rule public MockitoRule mocks = MockitoJUnit.rule(); + + @Mock PerfLogger perfLoggerMock; + @Mock Hive hiveMock; + + private static final Schema QUERY_EVENTS_SCHEMA = AvroSchemaLoader.loadSchema("QueryEvents.avsc"); + + private QueryState queryState; + + private MigrationAssessmentLoggingHook hook; + + @Before + public void setup() { + hook = new MigrationAssessmentLoggingHook(); + queryState = new QueryState(new HiveConf()); + } + + @Test + public void call_success() throws Exception { + String queryText = "SELECT * FROM employees"; + String queryId = "hive_query_id_999"; + QueryPlan queryPlan = createQueryPlan(queryText, queryId); + HookContext context = createContext(queryPlan); + context.setHookType(HookType.PRE_EXEC_HOOK); + + // Act + hook.run(context); + + // Assert + assertThat(hook.records) + .containsExactly( + new GenericRecordBuilder(QUERY_EVENTS_SCHEMA) + .set("QueryId", queryId) + .set("QueryText", queryText) + .set("EventType", "PRE_EXEC_HOOK") + .set("ExecutionMode", "NONE") + .set("Timestamp", 1234L) + .set("RequestUser", "test_user") + .set("User", System.getProperty("user.name")) + .build()); + } + + private HookContext createContext(QueryPlan queryPlan) throws Exception { + return new HookContext( + queryPlan, + queryState, + null, + "test_user", + "192.168.10.10", + "hive_addr", + "test_op_id", + "test_session_id", + "test_thread_id", + true, + perfLoggerMock); + } + + private QueryPlan createQueryPlan(String queryText, String queryId) throws Exception { + BaseSemanticAnalyzer sem = new DDLSemanticAnalyzer(queryState, hiveMock); + + return new QueryPlan(queryText, sem, 1234L, queryId, HiveOperation.QUERY, null); + } +}