class CleanMissingData extends Estimator[CleanMissingDataModel] with HasInputCols with HasOutputCols with Wrappable with DefaultParamsWritable with SynapseMLLogging
Removes missing values from input dataset.
The following modes are supported:
Mean - replaces missings with mean of fit column
Median - replaces missings with approximate median of fit column
Custom - replaces missings with custom value specified by user
For mean and median modes, only numeric column types are supported, specifically:
Int
, Long
, Float
, Double
For custom mode, the types above are supported and additionally:
String
, Boolean
- Alphabetic
- By Inheritance
- CleanMissingData
- SynapseMLLogging
- DefaultParamsWritable
- MLWritable
- Wrappable
- RWrappable
- PythonWrappable
- BaseWrappable
- HasOutputCols
- HasInputCols
- Estimator
- PipelineStage
- Logging
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
- val cleaningMode: Param[String]
-
final
def
clear(param: Param[_]): CleanMissingData.this.type
- Definition Classes
- Params
-
def
copy(extra: ParamMap): Estimator[CleanMissingDataModel]
- Definition Classes
- CleanMissingData → Estimator → PipelineStage → Params
-
val
customValue: Param[String]
Custom value for imputation, supports numeric, string and boolean types.
Custom value for imputation, supports numeric, string and boolean types. Date and Timestamp currently not supported.
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
def
fit(dataset: Dataset[_]): CleanMissingDataModel
Fits the dataset, prepares the transformation function.
Fits the dataset, prepares the transformation function.
- dataset
The input dataset.
- returns
The model for removing missings.
- Definition Classes
- CleanMissingData → Estimator
-
def
fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[CleanMissingDataModel]
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" )
-
def
fit(dataset: Dataset[_], paramMap: ParamMap): CleanMissingDataModel
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" )
-
def
fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): CleanMissingDataModel
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" ) @varargs()
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
- def getCleaningMode: String
- def getCustomValue: String
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getInputCols: Array[String]
- Definition Classes
- HasInputCols
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
def
getOutputCols: Array[String]
- Definition Classes
- HasOutputCols
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getParamInfo(p: Param[_]): ParamInfo[_]
- Definition Classes
- BaseWrappable
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
val
inputCols: StringArrayParam
The names of the inputColumns
The names of the inputColumns
- Definition Classes
- HasInputCols
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
def
logClass(featureName: String): Unit
- Definition Classes
- SynapseMLLogging
-
def
logFit[T](f: ⇒ T, columns: Int): T
- Definition Classes
- SynapseMLLogging
-
def
logTransform[T](f: ⇒ T, columns: Int): T
- Definition Classes
- SynapseMLLogging
-
def
logVerb[T](verb: String, f: ⇒ T, columns: Option[Int] = None): T
- Definition Classes
- SynapseMLLogging
-
def
makePyFile(conf: CodegenConfig): Unit
- Definition Classes
- PythonWrappable
-
def
makeRFile(conf: CodegenConfig): Unit
- Definition Classes
- RWrappable
-
val
outputCols: StringArrayParam
The names of the output columns
The names of the output columns
- Definition Classes
- HasOutputCols
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
def
pyAdditionalMethods: String
- Definition Classes
- PythonWrappable
-
def
pyInitFunc(): String
- Definition Classes
- PythonWrappable
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
final
def
set[T](param: Param[T], value: T): CleanMissingData.this.type
- Definition Classes
- Params
- def setCleaningMode(value: String): CleanMissingData.this.type
- def setCustomValue(value: String): CleanMissingData.this.type
-
def
setInputCols(value: Array[String]): CleanMissingData.this.type
- Definition Classes
- HasInputCols
-
def
setOutputCols(value: Array[String]): CleanMissingData.this.type
- Definition Classes
- HasOutputCols
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
def
transformSchema(schema: StructType): StructType
- Definition Classes
- CleanMissingData → PipelineStage
- Annotations
- @DeveloperApi()
-
val
uid: String
- Definition Classes
- CleanMissingData → SynapseMLLogging → Identifiable
-
def
write: MLWriter
- Definition Classes
- DefaultParamsWritable → MLWritable