c
com.microsoft.azure.synapse.ml.exploratory
DistributionBalanceMeasure
Companion object DistributionBalanceMeasure
class DistributionBalanceMeasure extends Transformer with DataBalanceParams with ComplexParamsWritable with Wrappable with SynapseMLLogging
This transformer computes data balance measures based on a reference distribution. For now, we only support a uniform reference distribution.
The output is a dataframe that contains two columns:
- The sensitive feature name.
- A struct containing measure names and their values showing differences between
the observed and reference distributions. The following measures are computed:
- Kullback-Leibler Divergence - https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence
- Jensen-Shannon Distance - https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence
- Wasserstein Distance - https://en.wikipedia.org/wiki/Wasserstein_metric
- Infinity Norm Distance - https://en.wikipedia.org/wiki/Chebyshev_distance
- Total Variation Distance - https://en.wikipedia.org/wiki/Total_variation_distance_of_probability_measures
- Chi-Squared Test - https://en.wikipedia.org/wiki/Chi-squared_test
The output dataframe contains a row per sensitive feature.
- Annotations
- @Experimental()
Linear Supertypes
Ordering
- Alphabetic
- By Inheritance
Inherited
- DistributionBalanceMeasure
- SynapseMLLogging
- Wrappable
- DotnetWrappable
- RWrappable
- PythonWrappable
- BaseWrappable
- ComplexParamsWritable
- MLWritable
- DataBalanceParams
- HasOutputCol
- Transformer
- PipelineStage
- Logging
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
Visibility
- Public
- All
Instance Constructors
Value Members
-
final
def
clear(param: Param[_]): DistributionBalanceMeasure.this.type
- Definition Classes
- Params
-
def
copy(extra: ParamMap): Transformer
- Definition Classes
- DistributionBalanceMeasure → Transformer → PipelineStage → Params
-
def
dotnetAdditionalMethods: String
- Definition Classes
- DotnetWrappable
- val emptyReferenceDistribution: Array[Map[String, Double]]
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
- val featureNameCol: Param[String]
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
- def getFeatureNameCol: String
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
final
def
getOutputCol: String
- Definition Classes
- HasOutputCol
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getParamInfo(p: Param[_]): ParamInfo[_]
- Definition Classes
- BaseWrappable
- def getReferenceDistribution: Array[Map[String, Double]]
-
def
getSensitiveCols: Array[String]
- Definition Classes
- DataBalanceParams
-
def
getVerbose: Boolean
- Definition Classes
- DataBalanceParams
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
def
logClass(featureName: String): Unit
- Definition Classes
- SynapseMLLogging
-
def
logFit[T](f: ⇒ T, columns: Int): T
- Definition Classes
- SynapseMLLogging
-
def
logTransform[T](f: ⇒ T, columns: Int): T
- Definition Classes
- SynapseMLLogging
-
def
logVerb[T](verb: String, f: ⇒ T, columns: Option[Int] = None): T
- Definition Classes
- SynapseMLLogging
-
def
makeDotnetFile(conf: CodegenConfig): Unit
- Definition Classes
- DotnetWrappable
-
def
makePyFile(conf: CodegenConfig): Unit
- Definition Classes
- PythonWrappable
-
def
makeRFile(conf: CodegenConfig): Unit
- Definition Classes
- RWrappable
-
final
val
outputCol: Param[String]
- Definition Classes
- HasOutputCol
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
def
pyAdditionalMethods: String
- Definition Classes
- PythonWrappable
-
def
pyInitFunc(): String
- Definition Classes
- PythonWrappable
- val referenceDistribution: ArrayMapParam
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
val
sensitiveCols: StringArrayParam
- Definition Classes
- DataBalanceParams
-
final
def
set[T](param: Param[T], value: T): DistributionBalanceMeasure.this.type
- Definition Classes
- Params
- def setFeatureNameCol(value: String): DistributionBalanceMeasure.this.type
-
def
setOutputCol(value: String): DistributionBalanceMeasure.this.type
- Definition Classes
- DataBalanceParams
- def setReferenceDistribution(value: ArrayList[HashMap[String, Double]]): DistributionBalanceMeasure.this.type
- def setReferenceDistribution(value: Array[Map[String, Double]]): DistributionBalanceMeasure.this.type
-
def
setSensitiveCols(values: Array[String]): DistributionBalanceMeasure.this.type
- Definition Classes
- DataBalanceParams
-
def
setVerbose(value: Boolean): DistributionBalanceMeasure.this.type
- Definition Classes
- DataBalanceParams
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
def
transform(dataset: Dataset[_]): DataFrame
- Definition Classes
- DistributionBalanceMeasure → Transformer
-
def
transform(dataset: Dataset[_], paramMap: ParamMap): DataFrame
- Definition Classes
- Transformer
- Annotations
- @Since( "2.0.0" )
-
def
transform(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): DataFrame
- Definition Classes
- Transformer
- Annotations
- @Since( "2.0.0" ) @varargs()
-
def
transformSchema(schema: StructType): StructType
- Definition Classes
- DistributionBalanceMeasure → PipelineStage
-
val
uid: String
- Definition Classes
- DistributionBalanceMeasure → SynapseMLLogging → Identifiable
-
def
validateSchema(schema: StructType): Unit
- Definition Classes
- DistributionBalanceMeasure → DataBalanceParams
-
val
verbose: BooleanParam
- Definition Classes
- DataBalanceParams
-
def
write: MLWriter
- Definition Classes
- ComplexParamsWritable → MLWritable