Skip to content

Commit

Permalink
Merge branch 'sql/matching-stats-perf' into staging/matching-stats-perf
Browse files Browse the repository at this point in the history
# Conflicts:
#	backend/src/main/java/com/bakdata/conquery/mode/local/UpdateMatchingStatsSqlJob.java
  • Loading branch information
awildturtok committed Dec 10, 2024
2 parents ef78ded + eb23bd2 commit 0fe2001
Showing 1 changed file with 23 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.time.StopWatch;
import org.jooq.Condition;
Expand All @@ -61,12 +62,18 @@ public class UpdateMatchingStatsSqlJob extends Job {
private static final Name ENTITIES = name("entities");
private static final Name DATES = name("dates");

@ToString.Exclude
private final DatabaseConfig databaseConfig;
@ToString.Exclude
private final SqlExecutionService executionService;
@ToString.Exclude
private final DSLContext dslContext;
@ToString.Exclude
private final SqlFunctionProvider functionProvider;
private final Set<ConceptId> concepts;
@ToString.Exclude
private final ListeningExecutorService executors;
@ToString.Exclude
private ListenableFuture<?> all;

public UpdateMatchingStatsSqlJob(
Expand Down Expand Up @@ -100,11 +107,6 @@ private static void addEntryToConceptElement(final ConceptTreeNode<?> mostSpecif
mostSpecificChild.getMatchingStats().putEntry(columnKey, entry);
}

@Override
public String getLabel() {
return "Calculating Matching Stats for %s.".formatted(executionService);
}

@Override
public void execute() throws Exception {

Expand Down Expand Up @@ -144,7 +146,12 @@ public void cancel() {
super.cancel();
}

public void calculateMatchingStats(final TreeConcept treeConcept) {
@Override
public String getLabel() {
return "Calculating Matching Stats for %s.".formatted(executionService);
}

private void calculateMatchingStats(final TreeConcept treeConcept) {

log.info("BEGIN fetching results for {}", treeConcept.getId());

Expand All @@ -164,8 +171,7 @@ public void calculateMatchingStats(final TreeConcept treeConcept) {
.map(field -> field(field.getUnqualifiedName()))
.collect(Collectors.toList());

// if there is no validity date at all, we select no field

// if there is no validity date at all, no field is selected
final Field<?> validityDateExpression = toValidityDateExpression(validityDateMap);

final SelectJoinStep<Record> query = dslContext.select(relevantColumnsAliased)
Expand Down Expand Up @@ -311,15 +317,14 @@ private Field<String> toValidityDateExpression(final Map<Connector, List<ColumnD
.map(functionProvider::toDualColumn)
.toList();

// Need to use distinct as some ValidityDates overlap when using first/last day but also daterange
final List<Field<Date>> allStarts = validityDates.stream().map(ColumnDateRange::getStart).distinct().toList();
final List<Field<Date>> allEnds = validityDates.stream().map(ColumnDateRange::getEnd).distinct().toList();

//HANA does not like lest/greatest if a singleton
final Field<Date> startField = allStarts.size() > 1 ? functionProvider.least(allStarts) : allStarts.get(0);
final Field<Date> endField = allEnds.size() > 1 ? functionProvider.greatest(allEnds) : allEnds.get(0);

final ColumnDateRange minAndMax = ColumnDateRange.of(min(startField), max(endField));

final ColumnDateRange minAndMax = ColumnDateRange.of(
min(allStarts.size() > 1 ? functionProvider.least(allStarts) : allStarts.get(0)),
max(allEnds.size() > 1 ? functionProvider.greatest(allEnds) : allEnds.get(0))
);
return functionProvider.daterangeStringExpression(minAndMax);
}

Expand All @@ -335,6 +340,10 @@ private void mapRecordToConceptElements(final TreeConcept treeConcept, final Rec
try {
final String columnValue = record.get(CONNECTOR_COLUMN, String.class);

if (columnValue == null) {
return;
}

final ConceptTreeChild mostSpecificChild = treeCache.findMostSpecificChild(columnValue, rowMap);

// database value did not match any node of the concept
Expand Down

0 comments on commit 0fe2001

Please sign in to comment.