-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add EventRecordConstructor responsible for creating GenericRecords ba…
…sed on the hook type (#2) * Add EventRecordConstructor responsible for creating GenericRecords based on the hook type * Replaced ZoneId with ZoneOffset * Add DatePartitionedLogger and RecordsWriter (#3) * Add DatePartitionedLogger and RecordsWriter DatePartitionedLogger is a part of avro logging hook taking care of managing the system paths and creating RecordWriter instances. RecordsWriter is an Avro datum writer working with HDFS. * Add EventLogger (#4) * Add EventLogger Event logger is a core handler of the logging hook. It is almost a full copy of the EventLogger in the HiveProtoLoggingHook with minimal adjustments * Integrate EventLogger to a Hook (#5) Integrate EventLogger to a Hook This finishes the backbone implementation of the logging hook. The hook logs Avro messages to GCS (as it was tested on Dataproc. It should work the same with HDFS, but it's for further verification) file in a date partitioned folder. The implementation of rollover is not yet tested, but it was taken as is from the HiveProtoLoggingHook implementation. Verified on Dataproc with Hive v2.3.6 and v3.
- Loading branch information
Showing
22 changed files
with
1,589 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
src/java/com/google/cloud/bigquery/dwhassessment/hooks/logger/BUILD
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# Copyright 2022 Google LLC | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
load("@rules_java//java:defs.bzl", "java_library") | ||
|
||
package(default_visibility = ["//src:internal"]) | ||
|
||
java_library( | ||
name = "logger", | ||
srcs = glob(["*.java"]), | ||
deps = [ | ||
"//src/java/com/google/cloud/bigquery/dwhassessment/hooks/avro", | ||
"@maven//:com_google_guava_guava", | ||
"@maven//:org_apache_avro_avro", | ||
"@maven//:org_apache_commons_commons_compress", | ||
"@maven//:org_apache_hadoop_hadoop_common", | ||
"@maven//:org_apache_hive_hive_exec_2_2_0", | ||
"@maven//:org_slf4j_slf4j_api", | ||
], | ||
) |
93 changes: 93 additions & 0 deletions
93
...google/cloud/bigquery/dwhassessment/hooks/logger/DatePartitionedRecordsWriterFactory.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
/* | ||
* Copyright 2022 Google LLC | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.google.cloud.bigquery.dwhassessment.hooks.logger; | ||
|
||
import java.io.IOException; | ||
import java.time.Clock; | ||
import java.time.LocalDate; | ||
import java.time.ZoneOffset; | ||
import java.time.format.DateTimeFormatter; | ||
import java.time.format.DateTimeParseException; | ||
import org.apache.avro.Schema; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.hadoop.fs.FileSystem; | ||
import org.apache.hadoop.fs.Path; | ||
import org.apache.hadoop.fs.permission.FsPermission; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* Factory for {@link RecordsWriter} instances. Manages them to write to files, partitioned by | ||
* dates. | ||
*/ | ||
public class DatePartitionedRecordsWriterFactory { | ||
private static final Logger LOG = | ||
LoggerFactory.getLogger(DatePartitionedRecordsWriterFactory.class); | ||
private static final FsPermission DIR_PERMISSION = FsPermission.createImmutable((short) 1023); | ||
private final Path basePath; | ||
private final Configuration conf; | ||
private final Schema schema; | ||
private final Clock clock; | ||
|
||
public DatePartitionedRecordsWriterFactory( | ||
Path baseDir, Configuration conf, Schema schema, Clock clock) throws IOException { | ||
this.conf = conf; | ||
this.createDirIfNotExists(baseDir); | ||
this.schema = schema; | ||
this.clock = clock; | ||
basePath = baseDir.getFileSystem(conf).resolvePath(baseDir); | ||
} | ||
|
||
public static LocalDate getDateFromDir(String dirName) { | ||
try { | ||
return LocalDate.parse(dirName, DateTimeFormatter.ISO_LOCAL_DATE); | ||
} catch (DateTimeParseException e) { | ||
throw new IllegalArgumentException("Invalid directory: " + dirName, e); | ||
} | ||
} | ||
|
||
public RecordsWriter createWriter(String fileName) throws IOException { | ||
Path filePath = getPathForDate(getNow(), fileName); | ||
return new RecordsWriter(conf, filePath, schema); | ||
} | ||
|
||
private void createDirIfNotExists(Path path) throws IOException { | ||
FileSystem fileSystem = path.getFileSystem(conf); | ||
|
||
try { | ||
if (!fileSystem.exists(path)) { | ||
fileSystem.mkdirs(path); | ||
fileSystem.setPermission(path, DIR_PERMISSION); | ||
} | ||
} catch (IOException e) { | ||
LOG.warn("Error while trying to set permission", e); | ||
} | ||
} | ||
|
||
private Path getPathForDate(LocalDate date, String fileName) throws IOException { | ||
Path path = new Path(basePath, getDirForDate(date)); | ||
createDirIfNotExists(path); | ||
return new Path(path, fileName); | ||
} | ||
|
||
private String getDirForDate(LocalDate date) { | ||
return DateTimeFormatter.ISO_LOCAL_DATE.format(date); | ||
} | ||
|
||
public LocalDate getNow() { | ||
return clock.instant().atOffset(ZoneOffset.UTC).toLocalDate(); | ||
} | ||
} |
Oops, something went wrong.