class TextFeaturizer extends Estimator[PipelineModel] with TextFeaturizerParams with HasInputCol with HasOutputCol with BasicLogging
Featurize text.
- Alphabetic
- By Inheritance
- TextFeaturizer
- BasicLogging
- HasOutputCol
- HasInputCol
- TextFeaturizerParams
- DefaultParamsWritable
- MLWritable
- Wrappable
- RWrappable
- PythonWrappable
- BaseWrappable
- Estimator
- PipelineStage
- Logging
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
$[T](param: Param[T]): T
- Attributes
- protected
- Definition Classes
- Params
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
val
binary: BooleanParam
All nonnegative word counts are set to 1 when set to true
All nonnegative word counts are set to 1 when set to true
- Definition Classes
- TextFeaturizerParams
-
val
caseSensitiveStopWords: BooleanParam
Indicates whether a case sensitive comparison is performed on stop words.
Indicates whether a case sensitive comparison is performed on stop words.
- Definition Classes
- TextFeaturizerParams
-
lazy val
classNameHelper: String
- Attributes
- protected
- Definition Classes
- BaseWrappable
-
final
def
clear(param: Param[_]): TextFeaturizer.this.type
- Definition Classes
- Params
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
companionModelClassName: String
- Attributes
- protected
- Definition Classes
- BaseWrappable
-
def
copy(extra: ParamMap): TextFeaturizer.this.type
- Definition Classes
- TextFeaturizer → Estimator → PipelineStage → Params
-
def
copyValues[T <: Params](to: T, extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
lazy val
copyrightLines: String
- Attributes
- protected
- Definition Classes
- BaseWrappable
-
final
def
defaultCopy[T <: Params](extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
val
defaultStopWordLanguage: Param[String]
Specify the language to use for stop word removal.
Specify the language to use for stop word removal. The Use the custom setting when using the stopWords input
- Definition Classes
- TextFeaturizerParams
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
fit(dataset: Dataset[_]): PipelineModel
- Definition Classes
- TextFeaturizer → Estimator
-
def
fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[PipelineModel]
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" )
-
def
fit(dataset: Dataset[_], paramMap: ParamMap): PipelineModel
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" )
-
def
fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): PipelineModel
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" ) @varargs()
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getBinary: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getCaseSensitiveStopWords: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getDefaultStopWordLanguage: String
- Definition Classes
- TextFeaturizerParams
-
def
getInputCol: String
- Definition Classes
- HasInputCol
-
final
def
getMinDocFreq: Int
- Definition Classes
- TextFeaturizerParams
-
final
def
getMinTokenLength: Int
- Definition Classes
- TextFeaturizerParams
-
final
def
getNGramLength: Int
- Definition Classes
- TextFeaturizerParams
-
final
def
getNumFeatures: Int
- Definition Classes
- TextFeaturizerParams
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
def
getOutputCol: String
- Definition Classes
- HasOutputCol
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
final
def
getStopWords: String
- Definition Classes
- TextFeaturizerParams
-
final
def
getToLowercase: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getTokenizerGaps: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getTokenizerPattern: String
- Definition Classes
- TextFeaturizerParams
-
final
def
getUseIDF: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getUseNGram: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getUseStopWordsRemover: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
getUseTokenizer: Boolean
- Definition Classes
- TextFeaturizerParams
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
inputCol: Param[String]
The name of the input column
The name of the input column
- Definition Classes
- HasInputCol
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logBase(methodName: String): Unit
- Attributes
- protected
- Definition Classes
- BasicLogging
-
def
logClass(): Unit
- Definition Classes
- BasicLogging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logErrorBase(methodName: String, e: Exception): Unit
- Attributes
- protected
- Definition Classes
- BasicLogging
-
def
logFit[T](f: ⇒ T): T
- Definition Classes
- BasicLogging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logPredict[T](f: ⇒ T): T
- Definition Classes
- BasicLogging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrain[T](f: ⇒ T): T
- Definition Classes
- BasicLogging
-
def
logTransform[T](f: ⇒ T): T
- Definition Classes
- BasicLogging
-
def
logVerb[T](verb: String, f: ⇒ T): T
- Definition Classes
- BasicLogging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
makePyFile(conf: CodegenConfig): Unit
- Definition Classes
- PythonWrappable
-
def
makeRFile(conf: CodegenConfig): Unit
- Definition Classes
- RWrappable
-
val
minDocFreq: IntParam
Minimum number of documents in which a term should appear.
Minimum number of documents in which a term should appear.
- Definition Classes
- TextFeaturizerParams
-
val
minTokenLength: IntParam
Minumum token length; must be 0 or greater.
Minumum token length; must be 0 or greater.
- Definition Classes
- TextFeaturizerParams
-
val
nGramLength: IntParam
The size of the Ngrams
The size of the Ngrams
- Definition Classes
- TextFeaturizerParams
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
val
numFeatures: IntParam
Set the number of features to hash each document to
Set the number of features to hash each document to
- Definition Classes
- TextFeaturizerParams
-
val
outputCol: Param[String]
The name of the output column
The name of the output column
- Definition Classes
- HasOutputCol
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
def
pyAdditionalMethods: String
- Definition Classes
- PythonWrappable
-
lazy val
pyClassDoc: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
lazy val
pyClassName: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyExtraEstimatorImports: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyExtraEstimatorMethods: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
lazy val
pyInheritedClasses: Seq[String]
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyInitFunc(): String
- Definition Classes
- PythonWrappable
-
lazy val
pyInternalWrapper: Boolean
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
lazy val
pyObjectBaseClass: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamArg[T](p: Param[T]): String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamDefault[T](p: Param[T]): Option[String]
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamGetter(p: Param[_]): String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamSetter(p: Param[_]): String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamsArgs: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamsDefaults: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
lazy val
pyParamsDefinitions: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamsGetters: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pyParamsSetters: String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
pythonClass(): String
- Attributes
- protected
- Definition Classes
- PythonWrappable
-
def
rClass(): String
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
rDocString: String
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
rExtraBodyLines: String
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
rExtraInitLines: String
- Attributes
- protected
- Definition Classes
- RWrappable
-
lazy val
rFuncName: String
- Attributes
- protected
- Definition Classes
- RWrappable
-
lazy val
rInternalWrapper: Boolean
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
rParamArg[T](p: Param[T]): String
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
rParamsArgs: String
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
rSetterLines: String
- Attributes
- protected
- Definition Classes
- RWrappable
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
final
def
set(paramPair: ParamPair[_]): TextFeaturizer.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set(param: String, value: Any): TextFeaturizer.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set[T](param: Param[T], value: T): TextFeaturizer.this.type
- Definition Classes
- Params
- def setBinary(value: Boolean): TextFeaturizer.this.type
- def setCaseSensitiveStopWords(value: Boolean): TextFeaturizer.this.type
-
final
def
setDefault(paramPairs: ParamPair[_]*): TextFeaturizer.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
setDefault[T](param: Param[T], value: T): TextFeaturizer.this.type
- Attributes
- protected
- Definition Classes
- Params
- def setDefaultStopWordLanguage(value: String): TextFeaturizer.this.type
-
def
setInputCol(value: String): TextFeaturizer.this.type
- Definition Classes
- HasInputCol
- def setMinDocFreq(value: Int): TextFeaturizer.this.type
- def setMinTokenLength(value: Int): TextFeaturizer.this.type
- def setNGramLength(value: Int): TextFeaturizer.this.type
- def setNumFeatures(value: Int): TextFeaturizer.this.type
-
def
setOutputCol(value: String): TextFeaturizer.this.type
- Definition Classes
- HasOutputCol
- def setStopWords(value: String): TextFeaturizer.this.type
- def setToLowercase(value: Boolean): TextFeaturizer.this.type
- def setTokenizerGaps(value: Boolean): TextFeaturizer.this.type
- def setTokenizerPattern(value: String): TextFeaturizer.this.type
- def setUseIDF(value: Boolean): TextFeaturizer.this.type
- def setUseNGram(value: Boolean): TextFeaturizer.this.type
- def setUseStopWordsRemover(value: Boolean): TextFeaturizer.this.type
- def setUseTokenizer(value: Boolean): TextFeaturizer.this.type
-
val
stopWords: Param[String]
The words to be filtered out.
The words to be filtered out. This is a comma separated list of words, encoded as a single string. For example, "a, the, and"
- Definition Classes
- TextFeaturizerParams
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
val
toLowercase: BooleanParam
Indicates whether to convert all characters to lowercase before tokenizing.
Indicates whether to convert all characters to lowercase before tokenizing.
- Definition Classes
- TextFeaturizerParams
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
val
tokenizerGaps: BooleanParam
Indicates whether the regex splits on gaps (true) or matches tokens (false)
Indicates whether the regex splits on gaps (true) or matches tokens (false)
- Definition Classes
- TextFeaturizerParams
-
val
tokenizerPattern: Param[String]
Regex pattern used to match delimiters if gaps (true) or tokens (false)
Regex pattern used to match delimiters if gaps (true) or tokens (false)
- Definition Classes
- TextFeaturizerParams
-
def
transformSchema(schema: StructType): StructType
- Definition Classes
- TextFeaturizer → PipelineStage
-
def
transformSchema(schema: StructType, logging: Boolean): StructType
- Attributes
- protected
- Definition Classes
- PipelineStage
- Annotations
- @DeveloperApi()
-
val
uid: String
- Definition Classes
- TextFeaturizer → BasicLogging → Identifiable
-
val
useIDF: BooleanParam
Scale the Term Frequencies by IDF when set to true
Scale the Term Frequencies by IDF when set to true
- Definition Classes
- TextFeaturizerParams
-
val
useNGram: BooleanParam
Enumerate N grams when set
Enumerate N grams when set
- Definition Classes
- TextFeaturizerParams
-
val
useStopWordsRemover: BooleanParam
Indicates whether to remove stop words from tokenized data.
Indicates whether to remove stop words from tokenized data.
- Definition Classes
- TextFeaturizerParams
-
val
useTokenizer: BooleanParam
Tokenize the input when set to true
Tokenize the input when set to true
- Definition Classes
- TextFeaturizerParams
-
val
ver: String
- Definition Classes
- BasicLogging
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
write: MLWriter
- Definition Classes
- DefaultParamsWritable → MLWritable