TextFeaturizer

Instance Constructors

new TextFeaturizer()
new TextFeaturizer(uid: String)

uid
The id of the module

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def additionalPythonMethods(): String

Definition Classes
Wrappable
final def asInstanceOf[T0]: T0

Definition Classes
Any
val binary: BooleanParam

All nonnegative word counts are set to 1 when set to true
All nonnegative word counts are set to 1 when set to true

Definition Classes
TextFeaturizerParams
val caseSensitiveStopWords: BooleanParam

Indicates whether a case sensitive comparison is performed on stop words.
Indicates whether a case sensitive comparison is performed on stop words.

Definition Classes
TextFeaturizerParams
final def clear(param: Param[_]): TextFeaturizer.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
def copy(extra: ParamMap): Estimator[TextFeaturizerModel]

Definition Classes
TextFeaturizer → Estimator → PipelineStage → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val defaultStopWordLanguage: Param[String]

Specify the language to use for stop word removal.
Specify the language to use for stop word removal. The Use the custom setting when using the stopWords input

Definition Classes
TextFeaturizerParams
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Definition Classes
Params
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
def fit(dataset: Dataset[_]): TextFeaturizerModel

Definition Classes
TextFeaturizer → Estimator
def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[TextFeaturizerModel]

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): TextFeaturizerModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): TextFeaturizerModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" ) @varargs()
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
final def getBinary: Boolean

Definition Classes
TextFeaturizerParams
final def getCaseSensitiveStopWords: Boolean

Definition Classes
TextFeaturizerParams
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
final def getDefaultStopWordLanguage: String

Definition Classes
TextFeaturizerParams
def getInputCol: String

Definition Classes
HasInputCol
final def getMinDocFreq: Int

Definition Classes
TextFeaturizerParams
final def getMinTokenLength: Int

Definition Classes
TextFeaturizerParams
final def getNGramLength: Int

Definition Classes
TextFeaturizerParams
final def getNumFeatures: Int

Definition Classes
TextFeaturizerParams
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
def getOutputCol: String

Definition Classes
HasOutputCol
def getParam(paramName: String): Param[Any]

Definition Classes
Params
final def getStopWords: String

Definition Classes
TextFeaturizerParams
final def getToLowercase: Boolean

Definition Classes
TextFeaturizerParams
final def getTokenizerGaps: Boolean

Definition Classes
TextFeaturizerParams
final def getTokenizerPattern: String

Definition Classes
TextFeaturizerParams
final def getUseIDF: Boolean

Definition Classes
TextFeaturizerParams
final def getUseNGram: Boolean

Definition Classes
TextFeaturizerParams
final def getUseStopWordsRemover: Boolean

Definition Classes
TextFeaturizerParams
final def getUseTokenizer: Boolean

Definition Classes
TextFeaturizerParams
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hashCode(): Int

Definition Classes
AnyRef → Any
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
val inputCol: Param[String]

The name of the input column
The name of the input column

Definition Classes
HasInputCol
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val minDocFreq: IntParam

Minimum number of documents in which a term should appear.
Minimum number of documents in which a term should appear.

Definition Classes
TextFeaturizerParams
val minTokenLength: IntParam

Minumum token length; must be 0 or greater.
Minumum token length; must be 0 or greater.

Definition Classes
TextFeaturizerParams
val nGramLength: IntParam

The size of the Ngrams
The size of the Ngrams

Definition Classes
TextFeaturizerParams
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
val numFeatures: IntParam

Set the number of features to hash each document to
Set the number of features to hash each document to

Definition Classes
TextFeaturizerParams
val outputCol: Param[String]

The name of the output column
The name of the output column

Definition Classes
HasOutputCol
lazy val params: Array[Param[_]]

Definition Classes
Params
def save(path: String): Unit

Definition Classes
MLWritable
Annotations
@Since( "1.6.0" ) @throws( ... )
final def set(paramPair: ParamPair[_]): TextFeaturizer.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): TextFeaturizer.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): TextFeaturizer.this.type

Definition Classes
Params
def setBinary(value: Boolean): TextFeaturizer.this.type
def setCaseSensitiveStopWords(value: Boolean): TextFeaturizer.this.type
final def setDefault(paramPairs: ParamPair[_]*): TextFeaturizer.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): TextFeaturizer.this.type

Attributes
protected
Definition Classes
Params
def setDefaultStopWordLanguage(value: String): TextFeaturizer.this.type
def setInputCol(value: String): TextFeaturizer.this.type

Definition Classes
HasInputCol
def setMinDocFreq(value: Int): TextFeaturizer.this.type
def setMinTokenLength(value: Int): TextFeaturizer.this.type
def setNGramLength(value: Int): TextFeaturizer.this.type
def setNumFeatures(value: Int): TextFeaturizer.this.type
def setOutputCol(value: String): TextFeaturizer.this.type

Definition Classes
HasOutputCol
def setStopWords(value: String): TextFeaturizer.this.type
def setToLowercase(value: Boolean): TextFeaturizer.this.type
def setTokenizerGaps(value: Boolean): TextFeaturizer.this.type
def setTokenizerPattern(value: String): TextFeaturizer.this.type
def setUseIDF(value: Boolean): TextFeaturizer.this.type
def setUseNGram(value: Boolean): TextFeaturizer.this.type
def setUseStopWordsRemover(value: Boolean): TextFeaturizer.this.type
def setUseTokenizer(value: Boolean): TextFeaturizer.this.type
val stopWords: Param[String]

The words to be filtered out.
The words to be filtered out. This is a comma separated list of words, encoded as a single string. For example, "a, the, and"

Definition Classes
TextFeaturizerParams
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val toLowercase: BooleanParam

Indicates whether to convert all characters to lowercase before tokenizing.
Indicates whether to convert all characters to lowercase before tokenizing.

Definition Classes
TextFeaturizerParams
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
val tokenizerGaps: BooleanParam

Indicates whether the regex splits on gaps (true) or matches tokens (false)
Indicates whether the regex splits on gaps (true) or matches tokens (false)

Definition Classes
TextFeaturizerParams
val tokenizerPattern: Param[String]

Regex pattern used to match delimiters if gaps (true) or tokens (false)
Regex pattern used to match delimiters if gaps (true) or tokens (false)

Definition Classes
TextFeaturizerParams
def transformSchema(schema: StructType): StructType

Definition Classes
TextFeaturizer → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes
protected
Definition Classes
PipelineStage
Annotations
@DeveloperApi()
val uid: String

The id of the module
The id of the module

Definition Classes
TextFeaturizer → Identifiable
val useIDF: BooleanParam

Scale the Term Frequencies by IDF when set to true
Scale the Term Frequencies by IDF when set to true

Definition Classes
TextFeaturizerParams
val useNGram: BooleanParam

Enumerate N grams when set
Enumerate N grams when set

Definition Classes
TextFeaturizerParams
val useStopWordsRemover: BooleanParam

Indicates whether to remove stop words from tokenized data.
Indicates whether to remove stop words from tokenized data.

Definition Classes
TextFeaturizerParams
val useTokenizer: BooleanParam

Tokenize the input when set to true
Tokenize the input when set to true

Definition Classes
TextFeaturizerParams
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
def write: MLWriter

Definition Classes
DefaultParamsWritable → MLWritable

Related Docs: object TextFeaturizer | package text

class TextFeaturizer extends Estimator[TextFeaturizerModel] with TextFeaturizerParams with HasInputCol with HasOutputCol

Instance Constructors

new TextFeaturizer()

new TextFeaturizer(uid: String)

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def $[T](param: Param[T]): T

final def ==(arg0: Any): Boolean

def additionalPythonMethods(): String

final def asInstanceOf[T0]: T0

val binary: BooleanParam

val caseSensitiveStopWords: BooleanParam

final def clear(param: Param[_]): TextFeaturizer.this.type

def clone(): AnyRef

def copy(extra: ParamMap): Estimator[TextFeaturizerModel]

def copyValues[T <: Params](to: T, extra: ParamMap): T

final def defaultCopy[T <: Params](extra: ParamMap): T

val defaultStopWordLanguage: Param[String]

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def explainParam(param: Param[_]): String

def explainParams(): String

final def extractParamMap(): ParamMap

final def extractParamMap(extra: ParamMap): ParamMap

def finalize(): Unit

def fit(dataset: Dataset[_]): TextFeaturizerModel

def fit(dataset: Dataset[_], paramMaps: Array[ParamMap]): Seq[TextFeaturizerModel]

def fit(dataset: Dataset[_], paramMap: ParamMap): TextFeaturizerModel

def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): TextFeaturizerModel

final def get[T](param: Param[T]): Option[T]

final def getBinary: Boolean

final def getCaseSensitiveStopWords: Boolean

final def getClass(): Class[_]

final def getDefault[T](param: Param[T]): Option[T]

final def getDefaultStopWordLanguage: String

def getInputCol: String

final def getMinDocFreq: Int

final def getMinTokenLength: Int

final def getNGramLength: Int

final def getNumFeatures: Int

final def getOrDefault[T](param: Param[T]): T

def getOutputCol: String

def getParam(paramName: String): Param[Any]

final def getStopWords: String

final def getToLowercase: Boolean

final def getTokenizerGaps: Boolean

final def getTokenizerPattern: String

final def getUseIDF: Boolean

final def getUseNGram: Boolean

final def getUseStopWordsRemover: Boolean

final def getUseTokenizer: Boolean

final def hasDefault[T](param: Param[T]): Boolean

def hasParam(paramName: String): Boolean

def hashCode(): Int

def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

def initializeLogIfNecessary(isInterpreter: Boolean): Unit

val inputCol: Param[String]

final def isDefined(param: Param[_]): Boolean

final def isInstanceOf[T0]: Boolean

final def isSet(param: Param[_]): Boolean

def isTraceEnabled(): Boolean

def log: Logger

def logDebug(msg: ⇒ String, throwable: Throwable): Unit

def logDebug(msg: ⇒ String): Unit

def logError(msg: ⇒ String, throwable: Throwable): Unit

def logError(msg: ⇒ String): Unit

def logInfo(msg: ⇒ String, throwable: Throwable): Unit

def logInfo(msg: ⇒ String): Unit

def logName: String

def logTrace(msg: ⇒ String, throwable: Throwable): Unit

def logTrace(msg: ⇒ String): Unit

def logWarning(msg: ⇒ String, throwable: Throwable): Unit

def logWarning(msg: ⇒ String): Unit

val minDocFreq: IntParam

val minTokenLength: IntParam

val nGramLength: IntParam

final def ne(arg0: AnyRef): Boolean