trait TextFeaturizerParams extends Wrappable with DefaultParamsWritable
- Alphabetic
- By Inheritance
- TextFeaturizerParams
- DefaultParamsWritable
- MLWritable
- Wrappable
- DotnetWrappable
- RWrappable
- PythonWrappable
- BaseWrappable
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Abstract Value Members
Concrete Value Members
-
val
binary: BooleanParam
All nonnegative word counts are set to 1 when set to true
-
val
caseSensitiveStopWords: BooleanParam
Indicates whether a case sensitive comparison is performed on stop words.
-
final
def
clear(param: Param[_]): TextFeaturizerParams.this.type
- Definition Classes
- Params
-
val
defaultStopWordLanguage: Param[String]
Specify the language to use for stop word removal.
Specify the language to use for stop word removal. The Use the custom setting when using the stopWords input
-
def
dotnetAdditionalMethods: String
- Definition Classes
- DotnetWrappable
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
- final def getBinary: Boolean
- final def getCaseSensitiveStopWords: Boolean
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
- final def getDefaultStopWordLanguage: String
- final def getMinDocFreq: Int
- final def getMinTokenLength: Int
- final def getNGramLength: Int
- final def getNumFeatures: Int
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getParamInfo(p: Param[_]): ParamInfo[_]
- Definition Classes
- BaseWrappable
- final def getStopWords: String
- final def getToLowercase: Boolean
- final def getTokenizerGaps: Boolean
- final def getTokenizerPattern: String
- final def getUseIDF: Boolean
- final def getUseNGram: Boolean
- final def getUseStopWordsRemover: Boolean
- final def getUseTokenizer: Boolean
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
def
makeDotnetFile(conf: CodegenConfig): Unit
- Definition Classes
- DotnetWrappable
-
def
makePyFile(conf: CodegenConfig): Unit
- Definition Classes
- PythonWrappable
-
def
makeRFile(conf: CodegenConfig): Unit
- Definition Classes
- RWrappable
-
val
minDocFreq: IntParam
Minimum number of documents in which a term should appear.
-
val
minTokenLength: IntParam
Minumum token length; must be 0 or greater.
-
val
nGramLength: IntParam
The size of the Ngrams
-
val
numFeatures: IntParam
Set the number of features to hash each document to
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
def
pyAdditionalMethods: String
- Definition Classes
- PythonWrappable
-
def
pyInitFunc(): String
- Definition Classes
- PythonWrappable
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
final
def
set[T](param: Param[T], value: T): TextFeaturizerParams.this.type
- Definition Classes
- Params
-
val
stopWords: Param[String]
The words to be filtered out.
The words to be filtered out. This is a comma separated list of words, encoded as a single string. For example, "a, the, and"
-
val
toLowercase: BooleanParam
Indicates whether to convert all characters to lowercase before tokenizing.
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
val
tokenizerGaps: BooleanParam
Indicates whether the regex splits on gaps (true) or matches tokens (false)
-
val
tokenizerPattern: Param[String]
Regex pattern used to match delimiters if gaps (true) or tokens (false)
-
val
useIDF: BooleanParam
Scale the Term Frequencies by IDF when set to true
-
val
useNGram: BooleanParam
Enumerate N grams when set
-
val
useStopWordsRemover: BooleanParam
Indicates whether to remove stop words from tokenized data.
-
val
useTokenizer: BooleanParam
Tokenize the input when set to true
-
def
write: MLWriter
- Definition Classes
- DefaultParamsWritable → MLWritable