Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 185 additions & 34 deletions core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/add.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,69 +15,78 @@ import org.jetbrains.kotlinx.dataframe.Selector
import org.jetbrains.kotlinx.dataframe.annotations.AccessApiOverload
import org.jetbrains.kotlinx.dataframe.annotations.Interpretable
import org.jetbrains.kotlinx.dataframe.annotations.Refine
import org.jetbrains.kotlinx.dataframe.api.add
import org.jetbrains.kotlinx.dataframe.columns.BaseColumn
import org.jetbrains.kotlinx.dataframe.columns.ColumnAccessor
import org.jetbrains.kotlinx.dataframe.columns.ColumnPath
import org.jetbrains.kotlinx.dataframe.columns.ColumnReference
import org.jetbrains.kotlinx.dataframe.documentation.DocumentationUrls
import org.jetbrains.kotlinx.dataframe.documentation.ExcludeFromSources
import org.jetbrains.kotlinx.dataframe.exceptions.DuplicateColumnNamesException
import org.jetbrains.kotlinx.dataframe.exceptions.UnequalColumnSizesException
import org.jetbrains.kotlinx.dataframe.impl.api.insertImpl
import org.jetbrains.kotlinx.dataframe.impl.columns.resolveSingle
import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API
import kotlin.reflect.KProperty

/*
* `add` operation adds new columns to DataFrame.
*/

// region Add existing columns
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A lot of repetitions here, but probably okay if we don't have to change it often in the future

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMHO it's hard to find exact repetitions that can be replaces with kodex templates 😢.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done for add expression description


/**
* Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list.
* Adds new [columns] to the end of this [DataFrame] (at the top level).
*
* Original [DataFrame] is not modified.
* Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns].
*
* @param columns columns to add
* @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
* @throws [UnequalColumnSizesException] if columns in expected result have different sizes
* @return new [DataFrame] with added columns
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param columns columns to add.
* @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
* @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
* @return new [DataFrame] with added columns.
*/
public fun <T> DataFrame<T>.add(vararg columns: AnyBaseCol): DataFrame<T> = addAll(columns.asIterable())

/**
* Creates new [DataFrame] with given columns added to the end of original [DataFrame.columns] list.
* Adds new [columns] to the end of this [DataFrame] (at the top level).
*
* Returns a new [DataFrame] with the new [columns] appended to the original list of [DataFrame.columns].
*
* Original [DataFrame] is not modified.
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param columns columns to add
* @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
* @throws [UnequalColumnSizesException] if columns in expected result have different sizes
* @return new [DataFrame] with added columns
* @param columns columns to add.
* @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
* @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
* @return new [DataFrame] with added columns.
*/
public fun <T> DataFrame<T>.addAll(columns: Iterable<AnyBaseCol>): DataFrame<T> =
dataFrameOf(columns() + columns).cast()

/**
* Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list.
* Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level).
*
* Original [DataFrame] is not modified.
* Returns a new [DataFrame] with the columns from the specified
* [dataFrames] appended to the original list of [DataFrame.columns].
*
* @param dataFrames dataFrames to get columns from
* @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
* @throws [UnequalColumnSizesException] if columns in expected result have different sizes
* @return new [DataFrame] with added columns
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param dataFrames dataFrames to get columns from.
* @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
* @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
* @return new [DataFrame] with added columns.
*/
public fun <T> DataFrame<T>.add(vararg dataFrames: AnyFrame): DataFrame<T> = addAll(dataFrames.asIterable())

/**
* Creates new [DataFrame] with all columns from given [dataFrames] added to the end of original [DataFrame.columns] list.
* Adds all columns from the given [dataFrames] to the end of this [DataFrame] (at the top level).
*
* Returns a new [DataFrame] with the columns from the specified
* [dataFrames] appended to the original list of [DataFrame.columns].
*
* Original [DataFrame] is not modified.
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param dataFrames dataFrames to get columns from
* @throws [DuplicateColumnNamesException] if columns in expected result have repeated names
* @throws [UnequalColumnSizesException] if columns in expected result have different sizes
* @return new [DataFrame] with added columns
* @param dataFrames dataFrames to get columns from.
* @throws [DuplicateColumnNamesException] if columns in an expected result have repeated names.
* @throws [UnequalColumnSizesException] if columns in an expected result have different sizes.
* @return new [DataFrame] with added columns.
*/
@JvmName("addAllFrames")
public fun <T> DataFrame<T>.addAll(dataFrames: Iterable<AnyFrame>): DataFrame<T> =
Expand Down Expand Up @@ -115,14 +124,50 @@ public interface AddDataRow<out T> : DataRow<T> {
public typealias AddExpression<T, R> = Selector<AddDataRow<T>, R>

/**
* Creates new column using row [expression] and adds it to the end of [DataFrame]
* With an [AddExpression], you define the value that each row in the new column should have.
* This can be based on values from the same row in the original [DataFrame].
*
* Original [DataFrame] is not modified.
* You can also use functions like [prev] and [next] to access other rows, and combine them with
* [newValue][AddDataRow.newValue] to reference values already computed in the new column.
* For example, use `prev().newValue()` to access the new column value from the previous row.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice! very clear

*/
@ExcludeFromSources
internal interface AddExpressionDocs

/**
* Creates a new column using an [AddExpression] and
* adds a new column to the end of this [DataFrame] (at the top level).
*
* {@include [AddExpressionDocs]}
*
* Returns a new [DataFrame] with the new column appended to the original list of [DataFrame.columns].
*
* ## Example
*
* ```kotlin
* // Add a new column "sum" that contains the sum of values from the "firstValue"
* // and "secondValue" columns for each row.
* val dfWithSum = df.add("sum") { firstValue + secondValue }
*
* // Add a "fibonacci" column with the Fibonacci sequence:
* // for the first two rows, the value is 1;
* // for subsequent rows, it's the sum of the two previous Fibonacci values.
* val dfWithFibonacci = df.add("fibonacci") {
* if (index() < 2) 1
* else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
* }
* ```
*
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param name name for a new column.
* If it is empty, a unique column name will be generated.
* Otherwise, it should be unique for original [DataFrame].
* @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
* Defaults to [Infer.Nulls].
* @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
* @return new [DataFrame] with added column.
*
* @param name name for a new column. If it is empty, a unique column name will be generated. Otherwise, it should be unique for original [DataFrame].
* @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column
* @param expression [AddExpression] that computes column value for every [DataRow]
* @return new [DataFrame] with added column
* @throws DuplicateColumnNamesException if [DataFrame] already contains a column with given [name]
*/
@Refine
Expand All @@ -149,6 +194,34 @@ public inline fun <reified R, T> DataFrame<T>.add(
noinline expression: AddExpression<T, R>,
): DataFrame<T> = add(column.path(), infer, expression)

/**
* Creates a new column using [AddExpression] and inserts it at the specified [ColumnPath].
*
* {@include [AddExpressionDocs]}
*
* For more information: {@include [DocumentationUrls.Add]}.
*
* Returns a new [DataFrame] with the new column inserted at the given [path].
* {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
*
* ## Example
*
* ```kotlin
* // Add a new column "sum" inside the "info" column group (which will be created if it doesn't exist).
* // The column contains the sum of values from the "firstValue" and "secondValue" columns for each row.
* val dfWithSum = df.add(pathOf("info", "sum")) { firstValue + secondValue }
* ```
*
* @param path Target [ColumnPath] for the new column.
* If it points to a nested location,
* intermediate columns will be created if necessary.
* @param infer A value of [Infer] that specifies how to compute the column [type][BaseColumn.type] for the new column.
* Defaults to [Infer.Nulls].
* @param expression An [AddExpression] that computes the column value for every [DataRow] of the new column.
* @return A new [DataFrame] with the added column.
*
* @throws DuplicateColumnNamesException If the [DataFrame] already contains a column at the specified [path].
*/
public inline fun <reified R, T> DataFrame<T>.add(
path: ColumnPath,
infer: Infer = Infer.Nulls,
Expand All @@ -163,6 +236,10 @@ public inline fun <reified R, T> DataFrame<T>.add(

// region Create and add several columns

/**
* Receiver that is used by the [add] and [mapToFrame]
* for adding new columns and column groups based on [DataFrame] columns and row values.
*/
public class AddDsl<T>(
@PublishedApi internal val df: DataFrame<T>,
) : ColumnsContainer<T> by df,
Expand Down Expand Up @@ -253,6 +330,43 @@ public class AddDsl<T>(
public infix fun AddGroup<T>.into(column: AnyColumnGroupAccessor): Unit = into(column.name())
}

/**
* Creates new columns using the [AddDsl] builder.
*
* An [AddDsl] allows to add multiple new columns and column groups to a [DataFrame]
* using concise syntax based on `from`, `into` operations and [AddExpression]s.
*
* Returns a new [DataFrame] with the newly added columns.
*
* ## Example
*
* ```kotlin
* val dfWithAdded = df.add {
* // Add new column "yearOfBirth" computed as 2021 minus value in "age" column
* "yearOfBirth" from { 2021 - age }
*
* // Add column "is adult" with result of age > 18
* age > 18 into "is adult"
*
* // Add new column "role" using expression
* expr { if ( department == "IT") "developer" else "analyst" } into "role"
*
* // Add column group "details"
* group("details") {
* // Add column "last name length" with length of lastName
* name.lastName.length() into "last name length"
*
* // Add column "full name" by combining firstName and lastName
* "full name" from { name.firstName + " " + name.lastName }
* }
* }
* ```
*
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param body An [AddDsl] expression used to define new columns and column groups.
* @return A new [DataFrame] with the added columns.
*/
@Refine
@Interpretable("AddWithDsl")
public fun <T> DataFrame<T>.add(body: AddDsl<T>.() -> Unit): DataFrame<T> {
Expand All @@ -261,6 +375,43 @@ public fun <T> DataFrame<T>.add(body: AddDsl<T>.() -> Unit): DataFrame<T> {
return dataFrameOf(this@add.columns() + dsl.columns).cast()
}

/**
* Creates a new column using [AddExpression] and
* adds a new column to the end of each group (i.e., [DataFrame]s) of this [GroupBy] (at the top level).
*
* {@include [AddExpressionDocs]}
*
* Returns a new [GroupBy] with the new column
* appended to each group [DataFrame] to the original list of [DataFrame.columns].
*
* ## Example
*
* ```kotlin
* // Add a new column "sum" that contains the sum of values from the "firstValue"
* // and "secondValue" columns for each row.
* val gbWithSum = gb.add("sum") { firstValue + secondValue }
*
* // Add a "fibonacci" column with the Fibonacci sequence:
* // for the first two rows, the value is 1;
* // for subsequent rows, it's the sum of the two previous Fibonacci values.
* val gbWithFibonacci = gb.add("fibonacci") {
* if (index() < 2) 1
* else prev()!!.newValue<Int>() + prev()!!.prev()!!.newValue<Int>()
* }
* ```
*
* For more information: {@include [DocumentationUrls.Add]}.
*
* @param name name for a new column.
* If it is empty, a unique column name will be generated.
* Otherwise, it should be unique for original group [DataFrame]s.
* @param infer a value of [Infer] that specifies how to compute column [type][BaseColumn.type] for a new column.
* Defaults to [Infer.Nulls].
* @param expression [AddExpression] that computes column value for every [DataRow] of a new column.
* @return new [GroupBy] with added column.
*
* @throws DuplicateColumnNamesException if group [DataFrame]s already contains a column with given [name].
*/
@Refine
@Interpretable("GroupByAdd")
public inline fun <reified R, T, G> GroupBy<T, G>.add(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ public fun <T, C> GroupClause<T, C>.into(column: ColumnsSelectionDsl<T>.(ColumnW
* Groups columns, previously selected with [group], into a new or existing column group
* within the [DataFrame] by specifying its path via [ColumnsSelectionDsl] expression.
*
* If the specified path refers to a non-existent column group, it will be created automatically,
* including any missing intermediate segments.
* {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
*
* See [Selecting Columns][SelectingColumns].
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,8 @@ public fun <T> DataFrame<T>.moveToEnd(vararg columns: KProperty<*>): DataFrame<T
/**
* Moves columns, previously selected with [move] into a new position specified by a
* given column path within the [DataFrame].
* If there are non-existent column groups on this path, they will be created.
*
* {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
*
* See [Selecting Columns][SelectingColumns].
*
Expand Down Expand Up @@ -417,7 +418,8 @@ public fun <T, C> MoveClause<T, C>.into(column: String): DataFrame<T> = pathOf(c
* Moves columns, previously selected with [move] into a new position specified by a
* given column path within the [DataFrame].
* Provides selected column indices.
* If there are non-existent column groups on this path, they will be created.
*
* {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
*
* See [Selecting Columns][SelectingColumns].
*
Expand Down Expand Up @@ -480,7 +482,8 @@ public fun <T, C> MoveClause<T, C>.under(column: AnyColumnGroupAccessor): DataFr
* Moves columns, previously selected with [move] under a new or
* an existing column group specified by a
* column path within the [DataFrame].
* If there are non-existent column groups on this path, they will be created.
*
* {@include [org.jetbrains.kotlinx.dataframe.documentation.ColumnPathCreation]}
*
* See [Selecting Columns][SelectingColumns].
*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package org.jetbrains.kotlinx.dataframe.documentation

/**
* If the specified path is partially or fully missing — that is, if any segment of the path
* does not correspond to an existing column or column group — all missing parts will be created automatically.
*/
@ExcludeFromSources
internal interface ColumnPathCreation
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,7 @@ internal interface DocumentationUrls {

/** [See `convert` on the documentation website.]({@include [Url]}/convert.html) */
interface Convert

/** [See `add` on the documentation website.]({@include [Url]}/add.html) */
interface Add
}