Skip to content

Commit

Permalink
Merge pull request #10 from brudaswen/feature/issue-9
Browse files Browse the repository at this point in the history
Support `ignoreUnknownColumns`
  • Loading branch information
brudaswen authored Nov 8, 2020
2 parents b54f67f + 7304013 commit 5505bf0
Show file tree
Hide file tree
Showing 11 changed files with 230 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
### Added
- Support `ignoreUnknownColumns`.

## [1.0.2] - 2020-10-11
### Changed
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ CSV serialization and parsing options can be changed by providing a custom `CsvC
| `ignoreEmptyLines` | `true` | Ignore empty lines during parsing. |
| `hasHeaderRecord` | `false` | First line is header record. |
| `headerSeparator` | `.` | Character that is used to separate hierarchical header names. |
| `ignoreUnknownColumns` | `false` | Ignore unknown columns (only has effect when `hasHeaderRecord` is enabled). |
| `hasTrailingDelimiter` | `false` | If records end with a trailing `delimiter`. |

## Requirements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package kotlinx.serialization.csv
* @param ignoreEmptyLines Ignore empty lines during parsing (default: `true`).
* @param hasHeaderRecord First line is header record (default: `false`).
* @param headerSeparator Character that is used to separate hierarchical header names (default: `.`).
* @param ignoreUnknownColumns Ignore unknown columns when `hasHeaderRecord` is enabled (default: `false`).
* @param hasTrailingDelimiter If records end with a trailing [delimiter] (default: `false`).
*/
data class CsvConfiguration(
Expand All @@ -24,7 +25,8 @@ data class CsvConfiguration(
val ignoreEmptyLines: Boolean = true,
val hasHeaderRecord: Boolean = false,
val headerSeparator: Char = '.',
val hasTrailingDelimiter: Boolean = false
val ignoreUnknownColumns: Boolean = false,
val hasTrailingDelimiter: Boolean = false,
) {

init {
Expand Down
43 changes: 43 additions & 0 deletions library/src/main/kotlin/kotlinx/serialization/csv/CsvException.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
@file:Suppress("FunctionName")

package kotlinx.serialization.csv

import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerializationException
import kotlinx.serialization.descriptors.SerialDescriptor

/**
* Generic exception indicating a problem with CSV serialization and deserialization.
*/
internal open class CsvException(message: String) : SerializationException(message)

/**
* Thrown when [Csv] has failed to create a CSV string from the given value.
*/
internal class CsvEncodingException(message: String) : CsvException(message)

@OptIn(ExperimentalSerializationApi::class)
internal fun UnsupportedSerialDescriptorException(descriptor: SerialDescriptor) = CsvEncodingException (
"CSV does not support '${descriptor.kind}'."
)

@OptIn(ExperimentalSerializationApi::class)
internal fun HeadersNotSupportedForSerialDescriptorException(descriptor: SerialDescriptor) = CsvEncodingException (
"CSV headers are not supported for variable sized type '${descriptor.kind}'."
)

/**
* Thrown when [Csv] has failed to parse the given CSV string or deserialize it to a target class.
*/
internal class CsvDecodingException(message: String) : CsvException(message)

internal fun CsvDecodingException(offset: Int?, message: String) =
CsvDecodingException(if (offset != null) "Unexpected CSV token at offset $offset: $message" else message)

internal fun UnknownColumnHeaderException(offset: Int, header: String) = CsvDecodingException(
offset,
"""
|Encountered unknown column header '$header'.
|Use 'ignoreUnknownColumns = true' in 'Csv {}' builder to ignore unknown columns.
|""".trimMargin()
)
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,23 @@ internal class ClassCsvDecoder(
) : CsvDecoder(csv, reader, parent) {

private var elementIndex = 0
private var columnIndex = 0

override fun decodeElementIndex(descriptor: SerialDescriptor): Int = when {
reader.isDone || elementIndex >= descriptor.elementsCount -> CompositeDecoder.DECODE_DONE
classHeaders != null -> classHeaders[elementIndex]
reader.isDone -> CompositeDecoder.DECODE_DONE
elementIndex >= descriptor.elementsCount -> CompositeDecoder.DECODE_DONE
classHeaders != null && columnIndex >= classHeaders.size -> CompositeDecoder.DECODE_DONE

classHeaders != null ->
when (val result = classHeaders[columnIndex]) {
CompositeDecoder.UNKNOWN_NAME -> {
ignoreColumn()
decodeElementIndex(descriptor)
}
null -> CompositeDecoder.UNKNOWN_NAME
else -> result
}

else -> elementIndex
}

Expand All @@ -42,14 +55,31 @@ internal class ClassCsvDecoder(
}
}

override fun endStructure(descriptor: SerialDescriptor) {
super.endStructure(descriptor)

if (classHeaders != null && csv.configuration.ignoreUnknownColumns) {
while (columnIndex < classHeaders.size) {
ignoreColumn()
}
}
}

override fun endChildStructure(descriptor: SerialDescriptor) {
super.endChildStructure(descriptor)
elementIndex++
columnIndex++
}

override fun decodeColumn(): String {
val value = super.decodeColumn()
elementIndex++
columnIndex++
return value
}

private fun ignoreColumn() {
reader.readColumn()
columnIndex++
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package kotlinx.serialization.csv.decode
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.csv.Csv
import kotlinx.serialization.csv.CsvConfiguration
import kotlinx.serialization.csv.UnknownColumnHeaderException
import kotlinx.serialization.csv.UnsupportedSerialDescriptorException
import kotlinx.serialization.descriptors.PolymorphicKind
import kotlinx.serialization.descriptors.SerialDescriptor
import kotlinx.serialization.descriptors.StructureKind
Expand All @@ -24,10 +26,10 @@ internal abstract class CsvDecoder(
override val serializersModule: SerializersModule
get() = csv.serializersModule

protected val configuration: CsvConfiguration
private val configuration: CsvConfiguration
get() = csv.configuration

protected var headers: Headers? = null
private var headers: Headers? = null

override fun beginStructure(descriptor: SerialDescriptor): CompositeDecoder {
return when (descriptor.kind) {
Expand All @@ -47,8 +49,7 @@ internal abstract class CsvDecoder(
PolymorphicKind.OPEN ->
ClassCsvDecoder(csv, reader, this, headers)

else ->
error("CSV does not support '${descriptor.kind}'.")
else -> throw UnsupportedSerialDescriptorException(descriptor)
}
}

Expand Down Expand Up @@ -124,9 +125,11 @@ internal abstract class CsvDecoder(
private fun readHeaders(desc: SerialDescriptor, prefix: String): Headers {
val headers = Headers()
var position = 0
while (reader.isFirstRecord) {
// Read header value and check if it (still) starts with required prefix
while (!reader.isDone && reader.isFirstRecord) {
val offset = reader.offset
reader.mark()

// Read header value and check if it (still) starts with required prefix
val value = reader.readColumn()
if (!value.startsWith(prefix)) {
reader.reset()
Expand All @@ -151,8 +154,13 @@ internal abstract class CsvDecoder(
} else {
reader.unmark()
}
} else {
} else if (csv.configuration.ignoreUnknownColumns) {
headers[position] = CompositeDecoder.UNKNOWN_NAME
reader.unmark()
} else if (value == "" && !reader.isFirstRecord && configuration.hasTrailingDelimiter) {
reader.unmark()
} else {
throw UnknownColumnHeaderException(offset, value)
}
}
position++
Expand All @@ -176,8 +184,11 @@ internal abstract class CsvDecoder(
private val map = mutableMapOf<Int, Int>()
private val subHeaders = mutableMapOf<Int, Headers>()

val size
get() = map.size

operator fun get(position: Int) =
map.getOrElse(position) { CompositeDecoder.UNKNOWN_NAME }
map[position]

operator fun set(key: Int, value: Int) {
map[key] = value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import kotlinx.serialization.csv.CsvConfiguration
*/
internal class CsvReader(private val source: Source, private val configuration: CsvConfiguration) {

val offset
get() = source.offset

var recordNo = 0
private set

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ package kotlinx.serialization.csv.decode
*/
interface Source {

/**
* Current read offset in the source.
*/
val offset: Int

/**
* Check if there are more characters to read.
* @return True if EOF has not been read, yet; false if EOF has already been read.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ private const val EOF = -1
*/
internal class StringSource(private val content: String) : Source {

override val offset: Int
get() = position

private var position = 0

private var marks = arrayListOf<Int>()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package kotlinx.serialization.csv.config

import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.builtins.ListSerializer
import kotlinx.serialization.csv.Csv
import kotlinx.serialization.csv.CsvConfiguration
import kotlinx.serialization.csv.records.Data
import kotlinx.serialization.csv.records.IntStringRecord
import kotlinx.serialization.csv.records.Location
import kotlinx.serialization.csv.records.NestedRecord
import kotlinx.serialization.test.assertParse
import kotlinx.serialization.test.assertParseFails
import kotlin.test.Test

@OptIn(ExperimentalSerializationApi::class)
internal class CsvIgnoreUnknownKeysTest {

@Test
fun testMultipleColumns() = assertParse(
"a,b,IGNORED\r\n1,testing,ignored",
IntStringRecord(1, "testing"),
IntStringRecord.serializer(),
Csv(
CsvConfiguration(
hasHeaderRecord = true,
ignoreUnknownColumns = true
)
)
)

@Test
fun testMultipleColumns_failure() = assertParseFails(
"a,b,IGNORED\r\n1,testing,ignored",
IntStringRecord.serializer(),
Csv(
CsvConfiguration(
hasHeaderRecord = true
)
)
)

@Test
fun testMultipleColumnsReordered() = assertParse(
"IGNORED,b,a\r\nignored,testing,1",
IntStringRecord(1, "testing"),
IntStringRecord.serializer(),
Csv(
CsvConfiguration(
hasHeaderRecord = true,
ignoreUnknownColumns = true
)
)
)

@Test
fun testMultipleColumnsReordered_failure() = assertParseFails(
"IGNORED,b,a\r\nignored,testing,1",
IntStringRecord.serializer(),
Csv(
CsvConfiguration(
hasHeaderRecord = true
)
)
)

@Test
fun testNestedRecordListWithHeaderReordered() = assertParse(
"""IGNORED,time,name,data.location.lon,data.location.IGNORED,data.location.lat,data.speed,data.info,IGNORED
|IGNORED,0,Alice,1.0,IGNORED,0.0,100,info,IGNORED
|IGNORED,1,Bob,20.0,IGNORED,10.0,50,info2,IGNORED
|""".trimMargin().replace("\n", "\r\n"),
listOf(
NestedRecord(
time = 0,
name = "Alice",
data = Data(
location = Location(
lat = 0.0,
lon = 1.0
),
speed = 100,
info = "info"
)
),
NestedRecord(
time = 1,
name = "Bob",
data = Data(
location = Location(
lat = 10.0,
lon = 20.0
),
speed = 50,
info = "info2"
)
)
),
ListSerializer(NestedRecord.serializer()),
Csv(
CsvConfiguration(
hasHeaderRecord = true,
ignoreUnknownColumns = true
)
)
)
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ package kotlinx.serialization.test
import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.KSerializer
import kotlinx.serialization.StringFormat
import kotlinx.serialization.csv.CsvDecodingException
import kotlin.test.assertEquals
import kotlin.test.assertFailsWith

@OptIn(ExperimentalSerializationApi::class)
inline fun <reified T : Any?> assertStringFormAndRestored(
Expand Down Expand Up @@ -38,6 +40,17 @@ inline fun <reified T : Any> assertParse(
assertEquals(expected, restored)
}

@OptIn(ExperimentalSerializationApi::class)
inline fun <reified T : Any> assertParseFails(
input: String,
serializer: KSerializer<T>,
format: StringFormat
) {
assertFailsWith<CsvDecodingException> {
format.decodeFromString(serializer, input)
}
}

@OptIn(ExperimentalSerializationApi::class)
inline fun <reified T : Any> StringFormat.assertStringFormAndRestored(
expected: String,
Expand Down

0 comments on commit 5505bf0

Please sign in to comment.