Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ public annotation class HasSchema(val schemaArg: Int)
* Needed because some function calls only serve as a part of overall compile time DataSchema evaluation
* There's no need to update return type of such calls
*/
internal annotation class Interpretable(val interpreter: String)
public annotation class Interpretable(val interpreter: String)

/**
* Compiler plugin will replace return type of calls to the annotated function
*/
internal annotation class Refine
public annotation class Refine

internal annotation class OptInRefine

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ private interface CommonFillNullsFunctionDoc
* @include [SelectingColumns.Dsl.WithExample] {@include [SetFillNullsOperationArg]}
* @include [Update.DslParam]
*/
@Interpretable("FillNulls0")
public fun <T, C> DataFrame<T>.fillNulls(columns: ColumnsSelector<T, C?>): Update<T, C?> =
update(columns).where { it == null }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowExpression
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand All @@ -18,6 +20,7 @@ import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
import org.jetbrains.kotlinx.dataframe.impl.ColumnNameGenerator
import org.jetbrains.kotlinx.dataframe.impl.DataFrameImpl
import org.jetbrains.kotlinx.dataframe.impl.UNNAMED_COLUMN_PREFIX
import org.jetbrains.kotlinx.dataframe.impl.api.withValuesImpl
import org.jetbrains.kotlinx.dataframe.impl.asList
import org.jetbrains.kotlinx.dataframe.impl.columnName
import org.jetbrains.kotlinx.dataframe.impl.columns.ColumnAccessorImpl
Expand Down Expand Up @@ -268,6 +271,7 @@ public fun dataFrameOf(vararg header: ColumnReference<*>): DataFrameBuilder = Da

public fun dataFrameOf(vararg columns: AnyBaseCol): AnyFrame = dataFrameOf(columns.asIterable())

@Interpretable("DataFrameOf0")
public fun dataFrameOf(vararg header: String): DataFrameBuilder = dataFrameOf(header.toList())

public inline fun <reified C> dataFrameOf(vararg header: String, fill: (String) -> Iterable<C>): AnyFrame =
Expand Down Expand Up @@ -302,27 +306,15 @@ public class DataFrameBuilder(private val header: List<String>) {
}.toDataFrame()
}

@Refine
@Interpretable("DataFrameBuilderInvoke0")
public operator fun invoke(vararg values: Any?): AnyFrame = withValues(values.asIterable())

@JvmName("invoke1")
internal fun withValues(values: Iterable<Any?>): AnyFrame {
val list = values.asList()

val ncol = header.size

require(header.isNotEmpty() && list.size.rem(ncol) == 0) {
"Number of values ${list.size} is not divisible by number of columns $ncol"
}

val nrow = list.size / ncol

return (0 until ncol).map { col ->
val colValues = (0 until nrow).map { row ->
list[row * ncol + col]
}
DataColumn.createWithTypeInference(header[col], colValues)
internal fun withValues(values: Iterable<Any?>): AnyFrame =
withValuesImpl(header, values.asList()).map { (name, values) ->
DataColumn.createWithTypeInference(name, values)
}.toDataFrame()
}

public operator fun invoke(args: Sequence<Any?>): AnyFrame = invoke(*args.toList().toTypedArray())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ public inline fun <T, C, reified R> Convert<T, C?>.notNull(
public class Convert<T, out C>(internal val df: DataFrame<T>, internal val columns: ColumnsSelector<T, C>) {
public fun <R> cast(): Convert<T, R> = Convert(df, columns as ColumnsSelector<T, R>)

@Refine
@Interpretable("To0")
public inline fun <reified D> to(): DataFrame<T> = to(typeOf<D>())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,22 @@ package org.jetbrains.kotlinx.dataframe.api

import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.columns.toColumnSet
import org.jetbrains.kotlinx.dataframe.impl.api.flattenImpl
import kotlin.reflect.KProperty

// region DataFrame

@Refine
@Interpretable("FlattenDefault")
public fun <T> DataFrame<T>.flatten(keepParentNameForColumns: Boolean = false, separator: String = "."): DataFrame<T> =
flatten(keepParentNameForColumns, separator) { all() }

@Refine
@Interpretable("Flatten0")
public fun <T, C> DataFrame<T>.flatten(
keepParentNameForColumns: Boolean = false,
separator: String = ".",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,13 @@ public abstract class CreateDataFrameDsl<T> : TraversePropertiesDsl {
public abstract operator fun String.invoke(builder: CreateDataFrameDsl<T>.() -> Unit)
}

@Refine
@Interpretable("ToDataFrameColumn")
public inline fun <reified T> Iterable<T>.toDataFrame(columnName: String): DataFrame<*> =
toDataFrame {
columnName from { it }
}

// endregion

// region toDataFrame overloads for built-in types
Expand Down Expand Up @@ -304,6 +311,8 @@ public interface ValueProperty<T> {
public val value: T
}

// endregion

// region Create DataFrame from Map

public fun Map<String, Iterable<Any?>>.toDataFrame(): AnyFrame =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import org.jetbrains.kotlinx.dataframe.DataFrameExpression
import org.jetbrains.kotlinx.dataframe.DataRow
import org.jetbrains.kotlinx.dataframe.RowColumnExpression
import org.jetbrains.kotlinx.dataframe.RowValueFilter
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.api.Update.Grammar
import org.jetbrains.kotlinx.dataframe.columns.ColumnGroup
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
Expand Down Expand Up @@ -273,7 +275,9 @@ public typealias UpdateExpression<T, C, R> = AddDataRow<T>.(C) -> R
* - {@include [SeeAlsoPerRowCol]}
* @param [expression] The {@include [ExpressionsGivenRow.RowValueExpressionLink]} to update the rows with.
*/
public fun <T, C> Update<T, C>.with(expression: UpdateExpression<T, C, C?>): DataFrame<T> =
@Refine
@Interpretable("UpdateWith0")
public fun <T, C, R : C?> Update<T, C>.with(expression: UpdateExpression<T, C, R>): DataFrame<T> =
Copy link
Collaborator

@Jolanrensen Jolanrensen Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my testrun of the compiler plugin I now cannot use update {}.with {} anymore, just fillNulls {}.with {}. It gives

[NONE_APPLICABLE] None of the following candidates is applicable: val DataRow<Into_93I>.age: Int? val ColumnsContainer<Into_93I>.age: DataColumn<Int?> 

when trying to access the updated column. Is this intended for now?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now yes, there's such issue because plugin fails to interpret update { }.with { } (update not supported) and fallback to an empty schema. Will fix

updateImpl { row, _, value ->
expression(row, value)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,8 @@ internal fun guessValueType(values: Sequence<Any?>, upperBound: KType? = null, l
collectionClasses.add(it.javaClass.kotlin)
}

is Function<*> -> classes.add(Function::class)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not :) Maybe we should change the rendering for functions in dataframes though. After a quick test I found it looks like:

⌌---------------------------------------------------------------------------------------------------------------⌍ | | a:Function<*>| b:Int| |--|-----------------------------------------------------------------------------------------------------|------| | 0| org.jetbrains.kotlinx.dataframe.testSets.person.DataFrameTests$$Lambda$60/0x000000010013a040@64ee819| 2| ⌎---------------------------------------------------------------------------------------------------------------⌏ 
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sadly for such lambda objects toString is weird. I tried to look at the object in the debugger, but there's literally nothing that hints at signature or anything useful

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm you'd think there was a way in kotlin to detect it's a () -> Int or something :/

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually...

image

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It already renders correctly often!

image

might just be a fluke in the tests if the lambda is serialized as interface

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what kernel version do you use?
image

Copy link
Collaborator

@Jolanrensen Jolanrensen Aug 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I run the dev version of this PR's branch in the notebook. (so publish to maven local and use v=0.14.0-dev)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, ok, so the fix is needed anyway


else -> classes.add(it.javaClass.kotlin)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package org.jetbrains.kotlinx.dataframe.impl.api

internal fun <T> withValuesImpl(header: List<String>, values: List<T>): List<Pair<String, List<T>>> {
val ncol = header.size

require(header.isNotEmpty() && values.size.rem(ncol) == 0) {
"Number of values ${values.size} is not divisible by number of columns $ncol"
}

val nrow = values.size / ncol

return (0 until ncol).map { col ->
val colValues = (0 until nrow).map { row ->
values[row * ncol + col]
}
header[col] to colValues
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.jetbrains.kotlinx.dataframe.kind
import org.jetbrains.kotlinx.dataframe.type
import org.junit.Ignore
import org.junit.Test
import java.io.File
import kotlin.reflect.KProperty
import kotlin.reflect.typeOf

Expand Down Expand Up @@ -452,4 +453,11 @@ class CreateDataFrameTests {
df.participants[0].city
}
}

@Test
fun toDataFrameColumn() {
val files = listOf(File("data.csv"))
val df = files.toDataFrame(columnName = "files")
df["files"][0] shouldBe File("data.csv")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import org.jetbrains.kotlinx.dataframe.explainer.TransformDataFrameExpressions
import org.jetbrains.kotlinx.dataframe.kind
import org.jetbrains.kotlinx.dataframe.type
import org.junit.Test
import java.io.File
import kotlin.reflect.typeOf

class Create : TestBase() {
Expand Down Expand Up @@ -456,4 +457,13 @@ class Create : TestBase() {
peek(dataFrameOf(col), dataFrameOf(col))
// SampleEnd
}

@Test
@TransformDataFrameExpressions
fun toDataFrameColumn() {
// SampleStart
val files = listOf(File("data.csv"), File("data1.csv"))
val df = files.toDataFrame(columnName = "data")
// SampleEnd
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ class DataFrameTests : BaseTest() {
dataFrameOf("name", "age", "city", "weight")(c1, c2, c3, c4) shouldBe df
}

@Test
fun `guess column type for type without classifier`() {
val df = dataFrameOf("a", "b")({ 1 }, 2)
df["a"].type() shouldBe typeOf<Function<*>>()
(df["a"][0] as () -> Int).invoke() shouldBe 1
}

@Test
fun `create with columnOf`() {
val col = columnOf("Alice", "Bob")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import org.jetbrains.kotlinx.dataframe.AnyRow
import org.jetbrains.kotlinx.dataframe.ColumnsSelector
import org.jetbrains.kotlinx.dataframe.DataColumn
import org.jetbrains.kotlinx.dataframe.DataFrame
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.api.dataFrameOf
import org.jetbrains.kotlinx.dataframe.api.forEach
import org.jetbrains.kotlinx.dataframe.api.select
Expand Down Expand Up @@ -143,6 +145,8 @@ public fun DataFrame.Companion.readExcel(
* @param nameRepairStrategy handling of column names.
* The default behavior is [NameRepairStrategy.CHECK_UNIQUE]
*/
@Refine
@Interpretable("ReadExcel")
public fun DataFrame.Companion.readExcel(
fileOrUrl: String,
sheetName: String? = null,
Expand Down Expand Up @@ -209,7 +213,9 @@ public fun DataFrame.Companion.readExcel(
* @param range comma separated list of Excel column letters and column ranges (e.g. “A:E” or “A,C,E:F”)
*/
@JvmInline
public value class StringColumns(public val range: String)
public value class StringColumns
@Interpretable("StringColumns")
constructor(public val range: String)

public fun StringColumns.toFormattingOptions(formatter: DataFormatter = DataFormatter()): FormattingOptions =
FormattingOptions(range, formatter)
Expand Down
Loading