interface DataFrame
cols |
Returns the ordered list of column this data-frame. abstract val cols: List<DataCol> |
names |
Returns the ordered list of column names of this data-frame. abstract val names: List<String> |
ncol |
abstract val ncol: Int |
nrow |
abstract val nrow: Int |
rows |
abstract val rows: Iterable<DataFrameRow> |
addColumn |
Adds new variables and preserves existing. abstract fun addColumn(tf: ColumnFormula): DataFrame open fun addColumn(columnName: String, expression: TableExpression): DataFrame |
addColumns |
open fun addColumns(vararg columnFormulas: ColumnFormula): DataFrame |
addRow |
Returns a DataFrame containing the new row. The new row must be a list whose length must match the number of columns in the DataFrame open fun addRow(row: List<Any?>): DataFrame |
filter |
abstract fun filter(predicate: VectorizedRowPredicate): DataFrame |
filterByRow |
open fun filterByRow(rowFilter: DataFrameRow.(DataFrameRow) -> Boolean): DataFrame |
get |
Returns a column by name. abstract operator fun get(columnName: String): DataCol
Returns a column by index. open operator fun get(columnIndex: Int): DataCol |
groupBy |
Creates a grouped data-frame given a list of grouping attributes. abstract fun groupBy(vararg by: String): DataFrame |
groupedBy |
Returns a data-frame of distinct grouping variable tuples for a grouped data-frame. An empty data-frame for ungrouped data. abstract fun groupedBy(): DataFrame |
groups |
Returns the groups of a grouped data frame or just a reference to this object if not. abstract fun groups(): List<DataFrame> |
remove |
Remove selected columns. open fun remove(vararg columns: String): DataFrame open fun remove(columns: Iterable<String>): DataFrame open fun remove(vararg columSelects: ColumnSelector): DataFrame
Remove selected columns using a predicate open fun remove(columnSelect: ColumnSelector): DataFrame |
removeIf |
Select or remove columns by predicate. open fun removeIf(colSelector: (DataCol) -> Boolean): DataFrame |
row |
Returns a row by index abstract fun row(rowIndex: Int): DataFrameRow |
select |
Create a new data frame with only the selected columns. abstract fun select(vararg columns: String): DataFrame open fun select(vararg columns: ColumnSelector): DataFrame
Convenience wrapper around to work with varag krangl.DataFrame.select open fun select(columns: Iterable<String>): DataFrame
Keeps only the variables that match any of the given expressions. open fun select(columnSelect: ColumnSelector): DataFrame |
selectIf |
Select or remove columns by predicate. open fun selectIf(colSelector: (DataCol) -> Boolean): DataFrame |
sortedBy |
Resorts the receiver in ascending order (small values to go top of table). The first argument defines the primary attribute to sort by. Additional ones are used to resolve ties. abstract fun sortedBy(vararg by: String): DataFrame |
sortedByDescending |
Resorts the receiver in descending order (small values to go bottom of table). The first argument defines the primary attribute to sort by. Additional ones are used to resolve ties. open fun sortedByDescending(vararg by: String): DataFrame |
summarize |
Creates a summary of a table or a group. The provided expression is expected to evaluate to a scalar value and not into a column.
abstract fun summarize(vararg sumRules: ColumnFormula): DataFrame |
transmute |
Create a new dataframe based on a list of column-formulas which are evaluated in the context of the this instance. open fun transmute(vararg formula: ColumnFormula): DataFrame |
ungroup |
Removes the grouping (if present from a data frame. abstract fun ungroup(): DataFrame |
addRowNumber |
Add the row-number as column to a data-frame. fun DataFrame.addRowNumber(name: String = "row_number"): DataFrame |
asString |
fun DataFrame.asString(title: String = "A DataFrame", colNames: Boolean = true, maxRows: Int = PRINT_MAX_ROWS, maxWidth: Int = PRINT_MAX_WIDTH, maxDigits: Int = PRINT_MAX_DIGITS, rowNumbers: Boolean = PRINT_ROW_NUMBERS): String |
bindRows |
Adds new rows. Missing entries are set to null. The output of bindRows() will contain a column if that column appears in any of the inputs. fun DataFrame.bindRows(df: DataFrame): DataFrame fun DataFrame.bindRows(vararg someRows: DataFrameRow): DataFrame |
complete |
Turns implicit missing values into explicit missing values. This is a wrapper around expand(), dplyr::left_join() and replace_na() that's useful for completing missing combinations of data. fun DataFrame.complete(vararg columnNames: String): DataFrame |
count |
Counts observations by group. fun DataFrame.count(vararg selects: String, name: String = "n"): DataFrame |
countExpr |
Counts expressions fun DataFrame.countExpr(vararg moreExpressions: TableExpression, name: String = "n", tableExpression: TableExpression? = null): DataFrame |
distinct |
Retain only unique/distinct rows from an input tbl. fun DataFrame.distinct(vararg selects: String = this.names.toTypedArray()): DataFrame |
expand |
expand() is often useful in conjunction with left_join if you want to convert implicit missing values to explicit missing values. Or you can use it in conjunction with anti_join() to figure out which combinations are missing. fun DataFrame.expand(vararg columnNames: String): DataFrame |
filter |
Filter the rows of a table with a single predicate. fun DataFrame.filter(predicate: ExpressionContext.(ExpressionContext) -> List<Boolean?>): DataFrame
AND-filter a table with different filters. fun DataFrame.filter(vararg predicates: DataFrame.(DataFrame) -> List<Boolean>): DataFrame |
flatten |
Unfold all list columns vertically and all objects properties horizontally. fun DataFrame.flatten(): DataFrame |
gather |
Gather takes multiple columns and collapses into key-value pairs, duplicating all other columns as needed. You use gather() when you notice that you have columns that are not variables. fun DataFrame.gather(key: String, value: String, columns: List<String> = this.names, convert: Boolean = false): DataFrame fun DataFrame.gather(key: String, value: String, columns: ColumnSelector, convert: Boolean = false): DataFrame |
groupBy |
Creates a grouped data-frame from a column selector function. See fun DataFrame.groupBy(columnSelect: ColumnSelector): DataFrame |
groupByExpr |
Creates a grouped data-frame from one or more table expressions. See fun DataFrame.groupByExpr(vararg moreExpressions: TableExpression, tableExpression: TableExpression? = null): DataFrame |
head |
fun DataFrame.head(numRows: Int = 5): DataFrame |
innerJoin |
fun DataFrame.innerJoin(right: DataFrame, by: String, suffices: Pair<String, String> = ".x" to ".y"): DataFrame fun DataFrame.innerJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right), suffices: Pair<String, String> = ".x" to ".y"): DataFrame |
leftJoin |
Convenience wrapper around joinLeft that works with single single by attribute. fun DataFrame.leftJoin(right: DataFrame, by: String, suffices: Pair<String, String> = ".x" to ".y"): DataFrame fun DataFrame.leftJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right), suffices: Pair<String, String> = ".x" to ".y"): DataFrame |
moveLeft |
Push some columns to the left end of a data-frame. fun DataFrame.moveLeft(vararg columnNames: String): DataFrame |
moveRight |
Push some columns to the right end of a data-frame. fun DataFrame.moveRight(vararg columnNames: String): DataFrame |
nest |
Nest repeated values in a list-variable. fun DataFrame.nest(colSelect: ColumnSelector = { except(*groupedBy().names.toTypedArray()) }, columnName: String = DEF_NEST_COLUMN_NAME): DataFrame |
oneHot |
Performs a one-hot encoding of the specified fun <T> DataFrame.oneHot(columnName: String, naValue: String = "NA", categorizeWith: (T?) -> String? = { it?.toString() }): DataFrame
Performs a one-hot encoding of the specified column. fun DataFrame.oneHot(columnName: String): DataFrame |
outerJoin |
fun DataFrame.outerJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right)): DataFrame |
|
fun DataFrame.print(title: String = "A DataFrame", colNames: Boolean = true, maxRows: Int = PRINT_MAX_ROWS, maxWidth: Int = PRINT_MAX_WIDTH, maxDigits: Int = PRINT_MAX_DIGITS, rowNumbers: Boolean = PRINT_ROW_NUMBERS): Unit |
printDataClassSchema |
Provides a code to convert a dataframe to a strongly typed list of kotlin data-class instances. fun DataFrame.printDataClassSchema(dataClassName: String, receiverVarName: String = "dataFrame"): Unit |
remove |
Remove columns by column type fun <T : DataCol> DataFrame.remove(): DataFrame |
rename |
fun DataFrame.rename(vararg old2new: Pair<String, String>): DataFrame
Rename one or several columns. Positions should be preserved fun DataFrame.rename(vararg old2new: RenameRule): DataFrame |
rowsAs |
Convert rows into objects by using reflection. Only parameters used in constructor will be mapped. Note: This is tested with kotlin data classes only. File a ticket for better type compatibility or any issues! fun <T> DataFrame.rowsAs(mapping: Map<String, String> = names.map { it to it }.toMap()): Iterable<T> |
rowwise |
Creates a grouped data-frame where each group consists of exactly one line. Thereby the row-number is used a group-hash. fun DataFrame.rowwise(): DataFrame |
sampleFrac |
Select random rows from a table. If receiver is grouped, sampling is done per group. fun DataFrame.sampleFrac(fraction: Double, replace: Boolean = false): DataFrame |
sampleN |
Select random rows from a table. If receiver is grouped, sampling is done per group. fun DataFrame.sampleN(n: Int, replace: Boolean = false): DataFrame |
schema |
Prints the schema (that is column names, types, and the first few values per column) of a dataframe to stdout. fun DataFrame.schema(maxDigits: Int = 3, maxWidth: Int = PRINT_MAX_WIDTH): Unit |
select |
Select columns by column type fun <T : DataCol> DataFrame.select(): DataFrame |
semiJoin |
fun DataFrame.semiJoin(right: DataFrame, by: String): DataFrame fun DataFrame.semiJoin(right: DataFrame, by: Iterable<Pair<String, String>>): DataFrame fun DataFrame.semiJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right), suffices: Pair<String, String> = ".x" to ".y"): DataFrame |
separate |
Given either regular expression or a vector of character positions, separate() turns a single character column into multiple columns. fun DataFrame.separate(column: String, into: List<String>, sep: String = "[^\\w]", remove: Boolean = true, convert: Boolean = false): DataFrame |
setNames |
Replace current column names with new ones. The number of provided names must match the number of columns. fun DataFrame.setNames(vararg newNames: String): DataFrame |
shuffle |
Randomize the row order of a data-frame. fun DataFrame.shuffle(): DataFrame |
slice |
Select rows by position while taking into account grouping in a data-frame. fun DataFrame.slice(vararg slices: Int): DataFrame fun DataFrame.slice(slice: IntRange): DataFrame |
sortedBy |
fun DataFrame.sortedBy(sortExpression: SortExpression): DataFrame fun DataFrame.sortedBy(vararg sortExpressions: SortExpression): DataFrame |
spread |
Spread a key-value pair across multiple columns. fun DataFrame.spread(key: String, value: String, fill: Any? = null, convert: Boolean = false): DataFrame |
summarize |
fun DataFrame.summarize(name: String, tableExpression: TableExpression): DataFrame |
summarizeAt |
fun DataFrame.summarizeAt(columnSelect: ColumnSelector, vararg aggfuns: AggFun): DataFrame fun DataFrame.summarizeAt(columnSelect: ColumnSelector, op: (SummarizeBuilder.() -> Unit)? = null): DataFrame |
tail |
fun DataFrame.tail(numRows: Int = 5): DataFrame |
take |
fun DataFrame.take(numRows: Int = 5): DataFrame |
takeLast |
fun DataFrame.takeLast(numRows: Int): DataFrame |
toDoubleMatrix |
fun DataFrame.toDoubleMatrix(): Array<DoubleArray> |
toFloatMatrix |
fun DataFrame.toFloatMatrix(): Array<FloatArray> |
toMap |
Expose a view on the data as map from column names to nullable arrays. fun DataFrame.toMap(): Map<String, Array<*>> |
unfold |
fun <T> DataFrame.unfold(columnName: String, properties: List<String> = detectPropertiesByReflection<T>().map { it.name }, keep: Boolean = true): DataFrame |
unite |
Convenience function to paste together multiple columns into one. fun DataFrame.unite(colName: String, which: List<String>, sep: String = "_", remove: Boolean = true): DataFrame fun DataFrame.unite(colName: String, vararg which: ColNames.() -> List<Boolean?>, sep: String = "_", remove: Boolean = true): DataFrame |
unnest |
If you have a list-column, this makes each element of the list its own row. It unfolds data vertically. unnest() can handle list-columns that can atomic vectors, lists, or data frames (but not a mixture of the different types). fun DataFrame.unnest(columnName: String): DataFrame |
writeCSV |
fun DataFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT.withHeader(*names.toTypedArray())): Unit |
writeExcel |
fun DataFrame.writeExcel(filePath: String, sheetName: String, headers: Boolean = true, eraseFile: Boolean = false, boldHeaders: Boolean = true): Unit |
writeTSV |
fun DataFrame.writeTSV(file: File, format: CSVFormat = CSVFormat.TDF.withHeader(*names.toTypedArray())): Unit |
builder |
Create a new data frame in place. fun DataFrame.Companion.builder(vararg header: String): InplaceDataFrameBuilder |
fromJson |
fun DataFrame.Companion.fromJson(file: File): DataFrame fun DataFrame.Companion.fromJson(fileOrUrl: String): DataFrame fun DataFrame.Companion.fromJson(url: URL): DataFrame |
fromJsonString |
fun DataFrame.Companion.fromJsonString(jsonData: String): DataFrame |
fromRecords |
Create a data-frame from a list of objects fun <T> DataFrame.Companion.fromRecords(records: Iterable<T>, mapping: (T) -> DataFrameRow): DataFrame |
fromResultSet |
fun DataFrame.Companion.fromResultSet(rs: ResultSet): DataFrame |
readCSV |
fun DataFrame.Companion.readCSV(fileOrUrl: String, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame |
readDelim |
fun DataFrame.Companion.readDelim(uri: URI, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), isCompressed: Boolean = uri.toURL().toString().endsWith(".gz"), colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readDelim(inStream: InputStream, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), isCompressed: Boolean = false, colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readDelim(reader: Reader, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), colTypes: Map<String, ColType> = mapOf(), skip: Int = 0): DataFrame |
readExcel |
Returns a DataFrame with the contents from an Excel file sheet fun DataFrame.Companion.readExcel(path: String, sheet: Int = 0, cellRange: CellRangeAddress? = null, colTypes: Map<String, ColType> = mapOf(), trim_ws: Boolean = false, guessMax: Int = 100, na: String = MISSING_VALUE, stopAtBlankLine: Boolean = true, includeBlankLines: Boolean = false): DataFrame fun DataFrame.Companion.readExcel(path: String, sheet: String, cellRange: CellRangeAddress? = null, colTypes: Map<String, ColType> = mapOf(), trim_ws: Boolean = false, guessMax: Int = 100, na: String = MISSING_VALUE, stopAtBlankLine: Boolean = true, includeBlankLines: Boolean = false): DataFrame |
readTSV |
fun DataFrame.Companion.readTSV(fileOrUrl: String, format: CSVFormat = CSVFormat.TDF.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readTSV(file: File, format: CSVFormat = CSVFormat.TDF.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame |