diff --git a/docs/exclusive.md b/docs/exclusive.md index 9eeeb617..046406a7 100644 --- a/docs/exclusive.md +++ b/docs/exclusive.md @@ -103,7 +103,7 @@ val sparkCol = f.expr("array_sort(value, (l, r) -> case " + val doricCol = colArray[Row]("value").sortBy(CName("name"), CNameOrd("age", Desc)) // doricCol: ArrayColumn[Row] = TransformationDoricColumn( -// Kleisli(scala.Function1$$Lambda$2938/0x000000010131e040@7fd4682) +// Kleisli(scala.Function1$$Lambda$2996/0x000000080133e040@6385b96c) // ) dfArrayStruct.select(sparkCol.as("sorted")).show(false) @@ -151,7 +151,7 @@ val mapColDoric = colString("value").matches[String] .caseW(_.length > 4, "error key".lit) .otherwiseNull // mapColDoric: DoricColumn[String] = TransformationDoricColumn( -// Kleisli(scala.Function1$$Lambda$2938/0x000000010131e040@bcecdd) +// Kleisli(scala.Function1$$Lambda$2996/0x000000080133e040@7c156ea) // ) dfMatch.withColumn("mapResult", mapColDoric).show() diff --git a/docs/implicits.md b/docs/implicits.md index 4b235389..b9218140 100644 --- a/docs/implicits.md +++ b/docs/implicits.md @@ -55,7 +55,7 @@ instead of `colStruct("person").getChild[Int]("age")`. Doric embraces the _dot notation_ of common idiomatic Scala code whenever possible, instead of the functional style of Spark SQL. For instance, given the following DataFrame: ```scala -val dfArrays = List(("string", Array(1,2,3))).toDF("str", "arr") +val dfArrays = List(("string", Array(1,2,3))).toDF("str", "arr") // dfArrays: org.apache.spark.sql.package.DataFrame = [str: string, arr: array] ``` @@ -69,7 +69,7 @@ val complexS: Column = (x, y) => x + y) // complexS: Column = aggregate(transform(arr, lambdafunction((x_0 + 1), x_0)), 0, lambdafunction((x_1 + y_2), x_1, y_2), lambdafunction(x_3, x_3)) -dfArrays.select(complexS as "complexTransformation").show() +dfArrays.select(complexS as "complexTransformation").show() // +---------------------+ // |complexTransformation| // +---------------------+ @@ -85,10 +85,10 @@ val complexCol: DoricColumn[Int] = .transform(_ + 1.lit) .aggregate(0.lit)(_ + _) // complexCol: DoricColumn[Int] = TransformationDoricColumn( -// Kleisli(scala.Function1$$Lambda$2938/0x000000010131e040@5b9b31e0) +// Kleisli(scala.Function1$$Lambda$2996/0x000000080133e040@2724ae48) // ) -dfArrays.select(complexCol as "complexTransformation").show() +dfArrays.select(complexCol as "complexTransformation").show() // +---------------------+ // |complexTransformation| // +---------------------+ @@ -102,14 +102,14 @@ dfArrays.select(complexCol as "complexTransformation").show() Implicit type conversions in Spark are pervasive. For instance, the following code won't cause Spark to complain at all: ```scala -val df0 = spark.range(1,10).withColumn("x", f.concat(f.col("id"), f.lit("jander"))) +val df0 = spark.range(1,10).withColumn("x", f.concat(f.col("id"), f.lit("jander"))) // df0: org.apache.spark.sql.package.DataFrame = [id: bigint, x: string] ``` which means that an implicit conversion from `bigint` to `string` will be in effect when we run our DataFrame: ```scala -df0.select(f.col("x")).show() +df0.select(f.col("x")).show() // +-------+ // | x| // +-------+ @@ -144,7 +144,7 @@ Still, doric will allow you to perform that operation provided that you explicit ```scala val df1 = spark.range(1,10).toDF().withColumn("x", concat(colLong("id").cast[String], "jander".lit)) // df1: org.apache.spark.sql.package.DataFrame = [id: bigint, x: string] -df1.show() +df1.show() // +---+-------+ // | id| x| // +---+-------+ @@ -166,7 +166,7 @@ Let's also consider the following example: ```scala val dfEq = List((1, "1"), (1, " 1"), (1, " 1 ")).toDF("int", "str") // dfEq: org.apache.spark.sql.package.DataFrame = [int: int, str: string] -dfEq.withColumn("eq", f.col("int") === f.col("str")) +dfEq.withColumn("eq", f.col("int") === f.col("str")) // res5: org.apache.spark.sql.package.DataFrame = [int: int, str: string ... 1 more field] ``` @@ -180,7 +180,7 @@ thus choosing to apply no conversion Let's see what happens: ```scala -dfEq.withColumn("eq", f.col("int") === f.col("str")).show() +dfEq.withColumn("eq", f.col("int") === f.col("str")).show() // +---+---+----+ // |int|str| eq| // +---+---+----+ @@ -206,7 +206,7 @@ dfEq.withColumn("eq", colInt("int") === colString("str")).show() ```scala // Option 2, casting from int to string -dfEq.withColumn("eq", colInt("int").cast[String] === colString("str")).show() +dfEq.withColumn("eq", colInt("int").cast[String] === colString("str")).show() // +---+---+-----+ // |int|str| eq| // +---+---+-----+ @@ -219,7 +219,7 @@ dfEq.withColumn("eq", colInt("int").cast[String] === colString("str")).show() ```scala // Option 3, casting from string to int, not safe! -dfEq.withColumn("eq", colInt("int") === colString("str").unsafeCast[Int]).show() +dfEq.withColumn("eq", colInt("int") === colString("str").unsafeCast[Int]).show() // +---+---+----+ // |int|str| eq| // +---+---+----+ @@ -240,7 +240,7 @@ with an explicit import statement: ```scala import doric.implicitConversions.implicitSafeCast -dfEq.withColumn("eq", colString("str") === colInt("int") ).show() +dfEq.withColumn("eq", colString("str") === colInt("int") ).show() // +---+---+-----+ // |int|str| eq| // +---+---+-----+ @@ -261,7 +261,7 @@ val intDF = List(1,2,3).toDF("int") val colS = f.col("int") + 1 // colS: Column = (int + 1) -intDF.select(colS).show() +intDF.select(colS).show() // +---------+ // |(int + 1)| // +---------+ @@ -277,10 +277,10 @@ The default doric syntax is a little stricter and forces us to transform these v ```scala val colD = colInt("int") + 1.lit // colD: DoricColumn[Int] = TransformationDoricColumn( -// Kleisli(scala.Function1$$Lambda$2938/0x000000010131e040@44946197) +// Kleisli(scala.Function1$$Lambda$2996/0x000000080133e040@29efc601) // ) -intDF.select(colD).show() +intDF.select(colD).show() // +---------+ // |(int + 1)| // +---------+ @@ -298,14 +298,14 @@ we have to _explicitly_ add the following import statement: import doric.implicitConversions.literalConversion val colSugarD = colInt("int") + 1 // colSugarD: DoricColumn[Int] = TransformationDoricColumn( -// Kleisli(scala.Function1$$Lambda$2938/0x000000010131e040@156eb4cf) +// Kleisli(scala.Function1$$Lambda$2996/0x000000080133e040@f38abd) // ) val columConcatLiterals = concat("this", "is","doric") // concat expects DoricColumn[String] values, the conversion puts them as expected // columConcatLiterals: StringColumn = TransformationDoricColumn( -// Kleisli(scala.Function1$$Lambda$2938/0x000000010131e040@644a6ddb) +// Kleisli(scala.Function1$$Lambda$2996/0x000000080133e040@239ee8b6) // ) // concat expects DoricColumn[String] values, the conversion puts them as expected -intDF.select(colSugarD, columConcatLiterals).show() +intDF.select(colSugarD, columConcatLiterals).show() // +---------+-----------------------+ // |(int + 1)|concat(this, is, doric)| // +---------+-----------------------+ diff --git a/docs/modularity.md b/docs/modularity.md index 2c911adc..ebffb5a6 100644 --- a/docs/modularity.md +++ b/docs/modularity.md @@ -20,7 +20,7 @@ userDF.show() // | John| Paris| 30| // +---------+---------+--------+ // -userDF.printSchema() +userDF.printSchema() // root // |-- name_user: string (nullable = true) // |-- city_user: string (nullable = true) diff --git a/docs/quickstart.md b/docs/quickstart.md index dc699040..5c433c8e 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -28,7 +28,7 @@ _Maven_ Doric is committed to use the most modern APIs first. * The latest stable version of doric is 0.0.7. -* The latest experimental version of doric is 0.0.0+1-6aeb249f-SNAPSHOT. +* The latest experimental version of doric is 0.0.0+1-065b3172-SNAPSHOT. * Doric is compatible with the following Spark versions: | Spark | Scala | Tested | doric | @@ -85,7 +85,7 @@ It's only when we try to construct the DataFrame that an exception is raised at ```scala df // org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "(value * true)" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("INT" and "BOOLEAN").; -// 'Project [unresolvedalias((value#365 * true), Some(org.apache.spark.sql.Column$$Lambda$5049/0x0000000101ba3040@e5859c7))] +// 'Project [unresolvedalias((value#365 * true), Some(org.apache.spark.sql.Column$$Lambda$5120/0x0000000801bc4040@4ce15909))] // +- LocalRelation [value#365] // // at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73) @@ -182,7 +182,7 @@ strDf.select(f.col("str").asDoric[String]).show() strDf.select((f.col("str") + f.lit(true)).asDoric[String]).show() // doric.sem.DoricMultiError: Found 1 error in select // [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "(str + true)" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("DOUBLE" and "BOOLEAN").; -// 'Project [unresolvedalias((cast(str#378 as double) + true), Some(org.apache.spark.sql.Column$$Lambda$5049/0x0000000101ba3040@e5859c7))] +// 'Project [unresolvedalias((cast(str#378 as double) + true), Some(org.apache.spark.sql.Column$$Lambda$5120/0x0000000801bc4040@4ce15909))] // +- Project [value#375 AS str#378] // +- LocalRelation [value#375] // @@ -196,7 +196,7 @@ strDf.select((f.col("str") + f.lit(true)).asDoric[String]).show() // at repl.MdocSession$MdocApp$$anonfun$2.apply(quickstart.md:76) // at repl.MdocSession$MdocApp$$anonfun$2.apply(quickstart.md:76) // Caused by: org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES] Cannot resolve "(str + true)" due to data type mismatch: the left and right operands of the binary operator have incompatible types ("DOUBLE" and "BOOLEAN").; -// 'Project [unresolvedalias((cast(str#378 as double) + true), Some(org.apache.spark.sql.Column$$Lambda$5049/0x0000000101ba3040@e5859c7))] +// 'Project [unresolvedalias((cast(str#378 as double) + true), Some(org.apache.spark.sql.Column$$Lambda$5120/0x0000000801bc4040@4ce15909))] // +- Project [value#375 AS str#378] // +- LocalRelation [value#375] // diff --git a/docs/validations.md b/docs/validations.md index 29b7fed0..03dc60ec 100644 --- a/docs/validations.md +++ b/docs/validations.md @@ -15,7 +15,7 @@ raising a run-time exception: // Spark List(1,2,3).toDF().select(f.col("id")+1) // org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name `id` cannot be resolved. Did you mean one of the following? [`value`].; -// 'Project [unresolvedalias(('id + 1), Some(org.apache.spark.sql.Column$$Lambda$5049/0x0000000101ba3040@e5859c7))] +// 'Project [unresolvedalias(('id + 1), Some(org.apache.spark.sql.Column$$Lambda$5120/0x0000000801bc4040@4ce15909))] // +- LocalRelation [value#399] // // at org.apache.spark.sql.errors.QueryCompilationErrors$.unresolvedAttributeError(QueryCompilationErrors.scala:221)