Contains all parts of the krangl API
AggFun |
data class AggFun |
AnyCol |
class AnyCol : DataCol |
ArrayUtils |
object ArrayUtils |
BooleanCol |
class BooleanCol : DataCol |
ColNames |
class ColNames |
ColSpec |
data class ColSpec |
ColType |
Methods to read and write tables into/from DataFrames see also https://commons.apache.org/proper/commons-csv/ for other implementations https://github.com/databricks/spark-csv https://examples.javacodegeeks.com/core-java/apache/commons/csv-commons/writeread-csv-files-with-apache-commons-csv-example/ enum class ColType |
ColumnFormula |
data class ColumnFormula |
ColumnSelector |
typealias ColumnSelector = ColNames.() -> List<Boolean?> |
DataCol |
abstract class DataCol |
DataFrame |
interface DataFrame |
DataFrameRow |
typealias DataFrameRow = Map<String, Any?> |
DoubleCol |
class DoubleCol : DataCol |
ExpressionContext |
A proxy on the class ExpressionContext : TableContext |
Factor |
class Factor |
InplaceDataFrameBuilder |
class InplaceDataFrameBuilder |
IntCol |
class IntCol : NumberCol |
JoinType |
DataFrame joining enum class JoinType |
LongCol |
class LongCol : NumberCol |
NumberCol |
abstract class NumberCol : DataCol |
RenameRule |
data class RenameRule |
SleepPattern |
data class SleepPattern |
SortExpression |
typealias SortExpression = SortingContext.(SortingContext) -> Any? |
SortingContext |
A proxy on the class SortingContext : TableContext |
StringCol |
class StringCol : DataCol |
SumFormula |
typealias SumFormula = DataCol.(DataCol) -> Any? |
SumFuns |
Common ggregation Functions to be used along `summarizeAt/If/All object SumFuns |
SummarizeBuilder |
class SummarizeBuilder |
TableContext |
open class TableContext |
TableExpression |
typealias TableExpression = ExpressionContext.(ExpressionContext) -> Any? |
UnequalByHelpers |
object UnequalByHelpers |
VectorizedRowPredicate |
typealias VectorizedRowPredicate = ExpressionContext.(ExpressionContext) -> BooleanArray |
ColumnTypeCastException |
class ColumnTypeCastException : RuntimeException |
DuplicatedColumnNameException |
class DuplicatedColumnNameException : RuntimeException |
InvalidColumnOperationException |
class InvalidColumnOperationException : RuntimeException |
InvalidColumnSelectException |
class InvalidColumnSelectException : RuntimeException |
InvalidSortingPredicateException |
class InvalidSortingPredicateException : RuntimeException |
MissingValueException |
Thrown if an operation is applied to a column that contains missing values. class MissingValueException : Throwable |
NonScalarValueException |
class NonScalarValueException : RuntimeException |
kotlin.Array |
|
kotlin.BooleanArray |
|
kotlin.collections.Iterable |
|
kotlin.collections.List |
|
kotlin.String |
_rand |
Random number generator used to row sampling. Reassignable to set seed for deterministic sampling. var _rand: Random |
DEF_NEST_COLUMN_NAME |
const val DEF_NEST_COLUMN_NAME: String |
flightsData |
On-time data for all flights that departed NYC (i.e. JFK, LGA or EWR) in 2013. val flightsData: DataFrame |
irisData |
This famous (Fisher's or Anderson's) iris data set gives the measurements in centimeters of the variables sepal length and width and petal length and width, respectively, for 50 flowers from each of 3 species of iris. The species are Iris setosa, versicolor, and virginica. val irisData: DataFrame |
MISSING_VALUE |
val MISSING_VALUE: String |
PRINT_MAX_DIGITS |
var PRINT_MAX_DIGITS: Int |
PRINT_MAX_ROWS |
var PRINT_MAX_ROWS: Int |
PRINT_MAX_WIDTH |
var PRINT_MAX_WIDTH: Int |
PRINT_ROW_NUMBERS |
var PRINT_ROW_NUMBERS: Boolean |
sleepData |
An example data frame with 83 rows and 11 variables val sleepData: DataFrame |
sleepPatterns |
val sleepPatterns: List<SleepPattern> |
addRowNumber |
Add the row-number as column to a data-frame. fun DataFrame.addRowNumber(name: String = "row_number"): DataFrame |
all |
fun ColNames.all(): List<Boolean> |
asFactor |
fun DataCol.asFactor(): DataCol |
asString |
fun DataFrame.asString(title: String = "A DataFrame", colNames: Boolean = true, maxRows: Int = PRINT_MAX_ROWS, maxWidth: Int = PRINT_MAX_WIDTH, maxDigits: Int = PRINT_MAX_DIGITS, rowNumbers: Boolean = PRINT_ROW_NUMBERS): String |
asType |
fun <R> DataCol.asType(): Array<R?> |
bindCols |
fun bindCols(left: DataFrame, right: DataFrame, renameDuplicates: Boolean = true): DataFrame |
bindRows |
Adds new rows. Missing entries are set to null. The output of bindRows() will contain a column if that column appears in any of the inputs. fun DataFrame.bindRows(df: DataFrame): DataFrame fun DataFrame.bindRows(vararg someRows: DataFrameRow): DataFrame fun bindRows(vararg dataFrames: DataFrame): DataFrame |
columnTypes |
fun columnTypes(df: DataFrame): List<ColSpec> |
complete |
Turns implicit missing values into explicit missing values. This is a wrapper around expand(), dplyr::left_join() and replace_na() that's useful for completing missing combinations of data. fun DataFrame.complete(vararg columnNames: String): DataFrame |
const |
fun ExpressionContext.const(someThing: Any): DataCol |
count |
Counts observations by group. fun DataFrame.count(vararg selects: String, name: String = "n"): DataFrame |
countExpr |
Counts expressions fun DataFrame.countExpr(vararg moreExpressions: TableExpression, name: String = "n", tableExpression: TableExpression? = null): DataFrame |
cumSum |
Calculates the cumulative sum of the column values. fun DataCol.cumSum(): DataCol |
dataFrameOf |
Create a new data frame in place. fun dataFrameOf(vararg header: String): InplaceDataFrameBuilder fun dataFrameOf(header: Iterable<String>): InplaceDataFrameBuilder
Create a new data-frame from a list of fun dataFrameOf(vararg columns: DataCol): DataFrame
Create a new data-frame from a records encoded as key-value maps. fun dataFrameOf(rows: Iterable<DataFrameRow>): DataFrame |
desc |
Creates a sorting attribute that inverts the order of the argument fun DataCol.desc(): IntCol |
distinct |
Retain only unique/distinct rows from an input tbl. fun DataFrame.distinct(vararg selects: String = this.names.toTypedArray()): DataFrame |
emptyDataFrame |
fun emptyDataFrame(): DataFrame |
endsWith |
fun ColNames.endsWith(prefix: String): List<Boolean?> |
eq |
infix fun DataCol.eq(i: Any): BooleanArray |
except |
Performs a negative selection by selecting all columns except the listed ones. fun ColNames.except(vararg columns: String): List<Boolean?> fun ColNames.except(columnSelector: ColumnSelector): List<Boolean?> |
expand |
expand() is often useful in conjunction with left_join if you want to convert implicit missing values to explicit missing values. Or you can use it in conjunction with anti_join() to figure out which combinations are missing. fun DataFrame.expand(vararg columnNames: String): DataFrame |
filter |
Filter the rows of a table with a single predicate. fun DataFrame.filter(predicate: ExpressionContext.(ExpressionContext) -> List<Boolean?>): DataFrame
AND-filter a table with different filters. fun DataFrame.filter(vararg predicates: DataFrame.(DataFrame) -> List<Boolean>): DataFrame |
gather |
Gather takes multiple columns and collapses into key-value pairs, duplicating all other columns as needed. You use gather() when you notice that you have columns that are not variables. fun DataFrame.gather(key: String, value: String, columns: List<String> = this.names, convert: Boolean = false): DataFrame fun DataFrame.gather(key: String, value: String, columns: ColumnSelector, convert: Boolean = false): DataFrame |
ge |
infix fun DataCol.ge(i: Number): BooleanArray infix fun DataCol.ge(i: DataCol): BooleanArray |
greaterEqualsThan |
fun DataCol.greaterEqualsThan(i: Number): BooleanArray fun DataCol.greaterEqualsThan(i: DataCol): BooleanArray |
greaterThan |
fun DataCol.greaterThan(i: Number): BooleanArray fun DataCol.greaterThan(i: DataCol): BooleanArray |
groupBy |
Creates a grouped data-frame from a column selector function. See fun DataFrame.groupBy(columnSelect: ColumnSelector): DataFrame |
groupByExpr |
Creates a grouped data-frame from one or more table expressions. See fun DataFrame.groupByExpr(vararg moreExpressions: TableExpression, tableExpression: TableExpression? = null): DataFrame |
gt |
infix fun DataCol.gt(i: Number): BooleanArray infix fun DataCol.gt(i: DataCol): BooleanArray |
head |
fun DataFrame.head(numRows: Int = 5): DataFrame |
innerJoin |
fun DataFrame.innerJoin(right: DataFrame, by: String, suffices: Pair<String, String> = ".x" to ".y"): DataFrame fun DataFrame.innerJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right), suffices: Pair<String, String> = ".x" to ".y"): DataFrame |
isEqualTo |
infix fun DataCol.isEqualTo(i: Any): BooleanArray |
isListOfType |
fun <T> isListOfType(items: List<Any?>): Boolean |
isMatching |
Match a text column in a NA-aware manner to create a predicate vector for filtering. fun <T> DataCol.isMatching(missingAs: Boolean = false, filter: T.() -> Boolean): BooleanArray |
isNA |
Maps a column to true for the NA values and fun DataCol.isNA(): BooleanArray fun ExpressionContext.isNA(columnName: String): BooleanArray |
isNotNA |
fun DataCol.isNotNA(): BooleanArray fun ExpressionContext.isNotNA(columnName: String): BooleanArray |
isOfType |
Test if for first non-null elemeent in list if it has specific type bu peeking into it from the top. fun <T> isOfType(items: Array<Any?>): Boolean |
join |
fun join(left: DataFrame, right: DataFrame, by: Iterable<String> = defaultBy(left, right), suffices: Pair<String, String> = ".x" to ".y", type: JoinType): DataFrame |
lag |
Returns the "previous" column values. Useful for comparing values behind the current values. fun DataCol.lag(n: Int = 1, default: Any? = null): DataCol |
le |
infix fun DataCol.le(i: Number): BooleanArray infix fun DataCol.le(i: DataCol): BooleanArray |
lead |
Returns the "next" column values. Useful for comparing values ahead of the current values. fun DataCol.lead(n: Int = 1, default: Any? = null): DataCol |
leftJoin |
Convenience wrapper around joinLeft that works with single single by attribute. fun DataFrame.leftJoin(right: DataFrame, by: String, suffices: Pair<String, String> = ".x" to ".y"): DataFrame fun DataFrame.leftJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right), suffices: Pair<String, String> = ".x" to ".y"): DataFrame |
lesserEquals |
fun DataCol.lesserEquals(i: Number): BooleanArray fun DataCol.lesserEquals(i: DataCol): BooleanArray |
lesserThan |
fun DataCol.lesserThan(i: Number): BooleanArray fun DataCol.lesserThan(i: DataCol): BooleanArray |
listOf |
fun ColNames.listOf(vararg someNames: String): List<Boolean?> fun ColNames.listOf(someNames: List<String>): List<Boolean?> |
lt |
infix fun DataCol.lt(i: Number): BooleanArray infix fun DataCol.lt(i: DataCol): BooleanArray |
main |
fun main(args: Array<String>): Unit |
map |
Allows to transform column data into list of same length ignoring missing values, which are kept but processing can be done in a non-null manner. fun <T> DataCol.map(expr: (T) -> Any?): List<Any?> |
matches |
fun ColNames.matches(regex: String): List<Boolean?> fun ColNames.matches(regex: Regex): List<Boolean?> |
max |
Calculates the maximum of the column values. fun DataCol.max(removeNA: Boolean = false): Double? |
mean |
Calculates the arithmetic mean of the column values. fun DataCol.mean(removeNA: Boolean = false): Double? |
median |
Calculates the median of the column values. fun DataCol.median(removeNA: Boolean = false): Double? |
min |
Calculates the minimum of the column values. fun DataCol.min(removeNA: Boolean = false): Double? |
moveLeft |
Push some columns to the left end of a data-frame. fun DataFrame.moveLeft(vararg columnNames: String): DataFrame |
moveRight |
Push some columns to the right end of a data-frame. fun DataFrame.moveRight(vararg columnNames: String): DataFrame |
nest |
Nest repeated values in a list-variable. fun DataFrame.nest(colSelect: ColumnSelector = { except(*groupedBy().names.toTypedArray()) }, columnName: String = DEF_NEST_COLUMN_NAME): DataFrame |
order |
fun DataCol.order(naLast: Boolean = true): List<Int> |
outerJoin |
fun DataFrame.outerJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right)): DataFrame |
pctChange |
Calculates the percentage change between the current and a prior column value. fun DataCol.pctChange(): DataCol |
|
fun DataFrame.print(title: String = "A DataFrame", colNames: Boolean = true, maxRows: Int = PRINT_MAX_ROWS, maxWidth: Int = PRINT_MAX_WIDTH, maxDigits: Int = PRINT_MAX_DIGITS, rowNumbers: Boolean = PRINT_ROW_NUMBERS): Unit |
printDataClassSchema |
Provides a code to convert a dataframe to a strongly typed list of kotlin data-class instances. fun DataFrame.printDataClassSchema(dataClassName: String, receiverVarName: String = "dataFrame"): Unit |
range |
fun ColNames.range(from: String, to: String): List<Boolean?> |
rank |
fun DataCol.rank(naLast: Boolean = true): List<Int> |
remove |
Remove columns by column type fun <T : DataCol> DataFrame.remove(): DataFrame |
rename |
fun DataFrame.rename(vararg old2new: Pair<String, String>): DataFrame
Rename one or several columns. Positions should be preserved fun DataFrame.rename(vararg old2new: RenameRule): DataFrame |
rowsAs |
Convert rows into objects by using reflection. Only parameters used in constructor will be mapped. Note: This is tested with kotlin data classes only. File a ticket for better type compatibility or any issues! fun <T> DataFrame.rowsAs(mapping: Map<String, String> = names.map { it to it }.toMap()): Iterable<T> |
rowwise |
Creates a grouped data-frame where each group consists of exactly one line. Thereby the row-number is used a group-hash. fun DataFrame.rowwise(): DataFrame |
sampleFrac |
Select random rows from a table. If receiver is grouped, sampling is done per group. fun DataFrame.sampleFrac(fraction: Double, replace: Boolean = false): DataFrame |
sampleN |
Select random rows from a table. If receiver is grouped, sampling is done per group. fun DataFrame.sampleN(n: Int, replace: Boolean = false): DataFrame |
schema |
Prints the schema (that is column names, types, and the first few values per column) of a dataframe to stdout. fun DataFrame.schema(maxDigits: Int = 3, maxWidth: Int = PRINT_MAX_WIDTH): Unit |
sd |
Calculates the standard deviation of the column values. fun DataCol.sd(removeNA: Boolean = false): Double? |
select |
Select columns by column type fun <T : DataCol> DataFrame.select(): DataFrame |
semiJoin |
fun DataFrame.semiJoin(right: DataFrame, by: String): DataFrame fun DataFrame.semiJoin(right: DataFrame, by: Iterable<Pair<String, String>>): DataFrame fun DataFrame.semiJoin(right: DataFrame, by: Iterable<String> = defaultBy(this, right), suffices: Pair<String, String> = ".x" to ".y"): DataFrame |
separate |
Given either regular expression or a vector of character positions, separate() turns a single character column into multiple columns. fun DataFrame.separate(column: String, into: List<String>, sep: String = "[^\\w]", remove: Boolean = true, convert: Boolean = false): DataFrame |
setNames |
Replace current column names with new ones. The number of provided names must match the number of columns. fun DataFrame.setNames(vararg newNames: String): DataFrame |
shuffle |
Randomize the row order of a data-frame. fun DataFrame.shuffle(): DataFrame |
slice |
Select rows by position while taking into account grouping in a data-frame. fun DataFrame.slice(vararg slices: Int): DataFrame fun DataFrame.slice(slice: IntRange): DataFrame |
sortedBy |
fun DataFrame.sortedBy(sortExpression: SortExpression): DataFrame fun DataFrame.sortedBy(vararg sortExpressions: SortExpression): DataFrame |
spread |
Spread a key-value pair across multiple columns. fun DataFrame.spread(key: String, value: String, fill: Any? = null, convert: Boolean = false): DataFrame |
startsWith |
fun ColNames.startsWith(prefix: String): List<Boolean?> |
sum |
Calculates the arithmetic mean of the column values. fun DataCol.sum(removeNA: Boolean = false): Number? |
summarize |
fun DataFrame.summarize(name: String, tableExpression: TableExpression): DataFrame |
summarizeAt |
fun DataFrame.summarizeAt(columnSelect: ColumnSelector, vararg aggfuns: AggFun): DataFrame fun DataFrame.summarizeAt(columnSelect: ColumnSelector, op: (SummarizeBuilder.() -> Unit)? = null): DataFrame |
tail |
fun DataFrame.tail(numRows: Int = 5): DataFrame |
take |
fun DataFrame.take(numRows: Int = 5): DataFrame |
takeLast |
fun DataFrame.takeLast(numRows: Int): DataFrame |
toBooleans |
fun DataCol.toBooleans(): Array<Boolean?> |
toDoubleMatrix |
fun DataFrame.toDoubleMatrix(): Array<DoubleArray> |
toDoubles |
fun DataCol.toDoubles(): Array<Double?> |
toFloatMatrix |
fun DataFrame.toFloatMatrix(): Array<FloatArray> |
toInts |
fun DataCol.toInts(): Array<Int?> |
toLongs |
fun DataCol.toLongs(): Array<Long?> |
toMap |
Expose a view on the data as map from column names to nullable arrays. fun DataFrame.toMap(): Map<String, Array<*>> |
toStrings |
fun DataCol.toStrings(): Array<String?> |
unfold |
fun <T> DataFrame.unfold(columnName: String, properties: List<String> = detectPropertiesByReflection<T>().map { it.name }, keep: Boolean = true): DataFrame |
unite |
Convenience function to paste together multiple columns into one. fun DataFrame.unite(colName: String, which: List<String>, sep: String = "_", remove: Boolean = true): DataFrame fun DataFrame.unite(colName: String, vararg which: ColNames.() -> List<Boolean?>, sep: String = "_", remove: Boolean = true): DataFrame |
unnest |
If you have a list-column, this makes each element of the list its own row. It unfolds data vertically. unnest() can handle list-columns that can atomic vectors, lists, or data frames (but not a mixture of the different types). fun DataFrame.unnest(columnName: String): DataFrame |
writeCSV |
fun DataFrame.writeCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT.withHeader(*names.toTypedArray())): Unit |
writeExcel |
fun DataFrame.writeExcel(filePath: String, sheetName: String, headers: Boolean = true, eraseFile: Boolean = false, boldHeaders: Boolean = true): Unit |
writeTSV |
fun DataFrame.writeTSV(file: File, format: CSVFormat = CSVFormat.TDF.withHeader(*names.toTypedArray())): Unit |
builder |
Create a new data frame in place. fun DataFrame.Companion.builder(vararg header: String): InplaceDataFrameBuilder |
fromJson |
fun DataFrame.Companion.fromJson(file: File): DataFrame fun DataFrame.Companion.fromJson(fileOrUrl: String): DataFrame fun DataFrame.Companion.fromJson(url: URL): DataFrame |
fromJsonString |
fun DataFrame.Companion.fromJsonString(jsonData: String): DataFrame |
fromRecords |
Create a data-frame from a list of objects fun <T> DataFrame.Companion.fromRecords(records: Iterable<T>, mapping: (T) -> DataFrameRow): DataFrame |
fromResultSet |
fun DataFrame.Companion.fromResultSet(rs: ResultSet): DataFrame |
readCSV |
fun DataFrame.Companion.readCSV(fileOrUrl: String, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readCSV(file: File, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame |
readDelim |
fun DataFrame.Companion.readDelim(uri: URI, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), isCompressed: Boolean = uri.toURL().toString().endsWith(".gz"), colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readDelim(inStream: InputStream, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), isCompressed: Boolean = false, colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readDelim(reader: Reader, format: CSVFormat = CSVFormat.DEFAULT.withHeader(), colTypes: Map<String, ColType> = mapOf(), skip: Int = 0): DataFrame |
readExcel |
Returns a DataFrame with the contents from an Excel file sheet fun DataFrame.Companion.readExcel(path: String, sheet: Int = 0, cellRange: CellRangeAddress? = null, colTypes: Map<String, ColType> = mapOf(), trim_ws: Boolean = false, guessMax: Int = 100, na: String = MISSING_VALUE, stopAtBlankLine: Boolean = true, includeBlankLines: Boolean = false): DataFrame fun DataFrame.Companion.readExcel(path: String, sheet: String, cellRange: CellRangeAddress? = null, colTypes: Map<String, ColType> = mapOf(), trim_ws: Boolean = false, guessMax: Int = 100, na: String = MISSING_VALUE, stopAtBlankLine: Boolean = true, includeBlankLines: Boolean = false): DataFrame |
readTSV |
fun DataFrame.Companion.readTSV(fileOrUrl: String, format: CSVFormat = CSVFormat.TDF.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame fun DataFrame.Companion.readTSV(file: File, format: CSVFormat = CSVFormat.TDF.withHeader(), colTypes: Map<String, ColType> = mapOf()): DataFrame |