Skip to content

Commit

Permalink
Add support for custom date format and openSearch date format for dat…
Browse files Browse the repository at this point in the history
…e fields as part of Lucene query (#2762)

Github Issue - #2700

Signed-off-by: Manasvini B S <[email protected]>
(cherry picked from commit 0fad56d)
Signed-off-by: Manasvini B S <[email protected]>
  • Loading branch information
manasvinibs committed Jul 22, 2024
1 parent f7c1f09 commit 363d8df
Show file tree
Hide file tree
Showing 18 changed files with 793 additions and 212 deletions.
42 changes: 42 additions & 0 deletions docs/user/general/datatypes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,48 @@ Querying such index will provide a response with ``schema`` block as shown below
"status": 200
}
If the sql query contains an `IndexDateField` and a literal value with an operator (such as a term query or a range query), then the literal value can be in the `IndexDateField` format.

.. code-block:: json
{
"mappings" : {
"properties" : {
"release_date" : {
"type" : "date",
"format": "dd-MMM-yy"
}
}
}
}
Querying such an `IndexDateField` (``release_date``) will provide a response with ``schema`` and ``datarows`` blocks as shown below.

.. code-block:: json
{
"query" : "SELECT release_date FROM test_index WHERE release_date = \"03-Jan-21\""
}
.. code-block:: json
{
"schema": [
{
"name": "release_date",
"type": "date"
}
],
"datarows": [
[
"2021-01-03"
]
],
"total": 1,
"size": 1,
"status": 200
}
String Data Types
=================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,19 +62,23 @@ public String toString() {
@EqualsAndHashCode.Exclude @Getter protected MappingType mappingType;

// resolved ExprCoreType
protected ExprCoreType exprCoreType;
@Getter protected ExprCoreType exprCoreType;

/**
* Get a simplified type {@link ExprCoreType} if possible. To avoid returning `UNKNOWN` for
* `OpenSearch*Type`s, e.g. for IP, returns itself.
* `OpenSearch*Type`s, e.g. for IP, returns itself. If the `exprCoreType` is {@link
* ExprCoreType#DATE}, {@link ExprCoreType#TIMESTAMP}, {@link ExprCoreType#TIME}, or {@link
* ExprCoreType#UNKNOWN}, it returns the current instance; otherwise, it returns `exprCoreType`.
*
* @return An {@link ExprType}.
*/
public ExprType getExprType() {
if (exprCoreType != ExprCoreType.UNKNOWN) {
return exprCoreType;
}
return this;
return (exprCoreType == ExprCoreType.DATE
|| exprCoreType == ExprCoreType.TIMESTAMP
|| exprCoreType == ExprCoreType.TIME
|| exprCoreType == ExprCoreType.UNKNOWN)
? this
: exprCoreType;
}

/**
Expand Down Expand Up @@ -230,6 +234,9 @@ public String legacyTypeName() {
if (mappingType == null) {
return exprCoreType.typeName();
}
if (mappingType.toString().equalsIgnoreCase("DATE")) {
return exprCoreType.typeName();
}
return mappingType.toString().toUpperCase();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;

import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.temporal.TemporalAccessor;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.EqualsAndHashCode;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.time.DateFormatters;
import org.opensearch.common.time.FormatNames;
import org.opensearch.sql.data.type.ExprCoreType;
import org.opensearch.sql.data.type.ExprType;
Expand Down Expand Up @@ -137,6 +142,11 @@ public class OpenSearchDateType extends OpenSearchDataType {

private static final String CUSTOM_FORMAT_DATE_SYMBOLS = "FecEWwYqQgdMLDyuG";

private static final List<DateFormatter> OPENSEARCH_DEFAULT_FORMATTERS =
Stream.of("strict_date_time_no_millis", "strict_date_optional_time", "epoch_millis")
.map(DateFormatter::forPattern)
.toList();

@EqualsAndHashCode.Exclude private final List<String> formats;

private OpenSearchDateType() {
Expand Down Expand Up @@ -235,6 +245,59 @@ public List<DateFormatter> getAllCustomFormatters() {
.collect(Collectors.toList());
}

/**
* Retrieves a list of custom formatters and OpenSearch named formatters defined by the user, and
* attempts to parse the given date/time string using these formatters.
*
* @param dateTime The date/time string to parse.
* @return A ZonedDateTime representing the parsed date/time in UTC, or null if parsing fails.
*/
public ZonedDateTime getParsedDateTime(String dateTime) {
List<DateFormatter> dateFormatters =
Stream.concat(this.getAllNamedFormatters().stream(), this.getAllCustomFormatters().stream())
.collect(Collectors.toList());
ZonedDateTime zonedDateTime = null;

// check if dateFormatters are empty, then set default ones
if (dateFormatters.isEmpty()) {
dateFormatters = OPENSEARCH_DEFAULT_FORMATTERS;
}
// parse using OpenSearch DateFormatters
for (DateFormatter formatter : dateFormatters) {
try {
TemporalAccessor accessor = formatter.parse(dateTime);
zonedDateTime = DateFormatters.from(accessor).withZoneSameLocal(ZoneOffset.UTC);
break;
} catch (IllegalArgumentException ignored) {
// nothing to do, try another format
}
}
return zonedDateTime;
}

/**
* Returns a formatted date string using the internal formatter, if available.
*
* @param accessor The TemporalAccessor object containing the date/time information.
* @return A formatted date string if a formatter is available, otherwise null.
*/
public String getFormattedDate(TemporalAccessor accessor) {
if (hasNoFormatter()) {
return OPENSEARCH_DEFAULT_FORMATTERS.get(0).format(accessor);
}
// Use the first available format string to create the formatter
return DateFormatter.forPattern(this.formats.get(0)).format(accessor);
}

/**
* Checks if the formatter is not initialized.
*
* @return True if the formatter is not set, otherwise false.
*/
public boolean hasNoFormatter() {
return this.formats.isEmpty();
}

/**
* Retrieves a list of named formatters that format for dates.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ private Optional<ExprType> type(String field) {
private static ExprValue parseDateTimeString(String value, OpenSearchDateType dataType) {
List<DateFormatter> formatters = dataType.getAllNamedFormatters();
formatters.addAll(dataType.getAllCustomFormatters());
ExprCoreType returnFormat = (ExprCoreType) dataType.getExprType();
ExprCoreType returnFormat = dataType.getExprCoreType();

for (DateFormatter formatter : formatters) {
try {
Expand Down Expand Up @@ -268,8 +268,7 @@ private static ExprValue parseDateTimeString(String value, OpenSearchDateType da

private static ExprValue createOpenSearchDateType(Content value, ExprType type) {
OpenSearchDateType dt = (OpenSearchDateType) type;
ExprType returnFormat = dt.getExprType();

ExprCoreType returnFormat = dt.getExprCoreType();
if (value.isNumber()) { // isNumber
var numFormatters = dt.getNumericNamedFormatters();
if (numFormatters.size() > 0 || !dt.hasFormats()) {
Expand All @@ -282,7 +281,7 @@ private static ExprValue createOpenSearchDateType(Content value, ExprType type)
epochMillis = value.longValue();
}
Instant instant = Instant.ofEpochMilli(epochMillis);
switch ((ExprCoreType) returnFormat) {
switch (returnFormat) {
case TIME:
return new ExprTimeValue(LocalTime.from(instant.atZone(UTC_ZONE_ID)));
case DATE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package org.opensearch.sql.opensearch.storage.script.aggregation.dsl;

import static org.opensearch.sql.data.type.ExprCoreType.DATE;
import static org.opensearch.sql.data.type.ExprCoreType.DATETIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIME;
import static org.opensearch.sql.data.type.ExprCoreType.TIMESTAMP;

Expand All @@ -24,6 +23,7 @@
import org.opensearch.sql.ast.expression.SpanUnit;
import org.opensearch.sql.expression.NamedExpression;
import org.opensearch.sql.expression.span.SpanExpression;
import org.opensearch.sql.opensearch.data.type.OpenSearchDateType;
import org.opensearch.sql.opensearch.storage.serialization.ExpressionSerializer;

/** Bucket Aggregation Builder. */
Expand Down Expand Up @@ -66,7 +66,10 @@ private CompositeValuesSourceBuilder<?> buildCompositeValuesSourceBuilder(
.missingOrder(missingOrder)
.order(sortOrder);
// Time types values are converted to LONG in ExpressionAggregationScript::execute
if (List.of(TIMESTAMP, TIME, DATE, DATETIME).contains(expr.getDelegated().type())) {
if ((expr.getDelegated().type() instanceof OpenSearchDateType
&& List.of(TIMESTAMP, TIME, DATE)
.contains(((OpenSearchDateType) expr.getDelegated().type()).getExprCoreType()))
|| List.of(TIMESTAMP, TIME, DATE).contains(expr.getDelegated().type())) {
sourceBuilder.userValuetypeHint(ValueType.LONG);
}
return helper.build(expr.getDelegated(), sourceBuilder::field, sourceBuilder::script);
Expand Down
Loading

0 comments on commit 363d8df

Please sign in to comment.