Packages

class PageSplitter extends Transformer with HasInputCol with HasOutputCol with Wrappable with DefaultParamsWritable with BasicLogging

Splits text into chunks of at most n characters

Linear Supertypes
BasicLogging, DefaultParamsWritable, MLWritable, Wrappable, RWrappable, PythonWrappable, BaseWrappable, HasOutputCol, HasInputCol, Transformer, PipelineStage, Logging, Params, Serializable, Serializable, Identifiable, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. PageSplitter
  2. BasicLogging
  3. DefaultParamsWritable
  4. MLWritable
  5. Wrappable
  6. RWrappable
  7. PythonWrappable
  8. BaseWrappable
  9. HasOutputCol
  10. HasInputCol
  11. Transformer
  12. PipelineStage
  13. Logging
  14. Params
  15. Serializable
  16. Serializable
  17. Identifiable
  18. AnyRef
  19. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new PageSplitter()
  2. new PageSplitter(uid: String)

    uid

    The id of the module

Value Members

  1. val boundaryRegex: Param[String]
  2. final def clear(param: Param[_]): PageSplitter.this.type
    Definition Classes
    Params
  3. def copy(extra: ParamMap): MultiNGram
    Definition Classes
    PageSplitter → Transformer → PipelineStage → Params
  4. def explainParam(param: Param[_]): String
    Definition Classes
    Params
  5. def explainParams(): String
    Definition Classes
    Params
  6. final def extractParamMap(): ParamMap
    Definition Classes
    Params
  7. final def extractParamMap(extra: ParamMap): ParamMap
    Definition Classes
    Params
  8. final def get[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  9. def getBoundaryRegex: String
  10. final def getDefault[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  11. def getInputCol: String

    Definition Classes
    HasInputCol
  12. def getMaximumPageLength: Int
  13. def getMinimumPageLength: Int
  14. final def getOrDefault[T](param: Param[T]): T
    Definition Classes
    Params
  15. def getOutputCol: String

    Definition Classes
    HasOutputCol
  16. def getParam(paramName: String): Param[Any]
    Definition Classes
    Params
  17. final def hasDefault[T](param: Param[T]): Boolean
    Definition Classes
    Params
  18. def hasParam(paramName: String): Boolean
    Definition Classes
    Params
  19. val inputCol: Param[String]

    The name of the input column

    The name of the input column

    Definition Classes
    HasInputCol
  20. final def isDefined(param: Param[_]): Boolean
    Definition Classes
    Params
  21. final def isSet(param: Param[_]): Boolean
    Definition Classes
    Params
  22. def logClass(): Unit
    Definition Classes
    BasicLogging
  23. def logFit[T](f: ⇒ T): T
    Definition Classes
    BasicLogging
  24. def logPredict[T](f: ⇒ T): T
    Definition Classes
    BasicLogging
  25. def logTrain[T](f: ⇒ T): T
    Definition Classes
    BasicLogging
  26. def logTransform[T](f: ⇒ T): T
    Definition Classes
    BasicLogging
  27. def logVerb[T](verb: String, f: ⇒ T): T
    Definition Classes
    BasicLogging
  28. def makePyFile(conf: CodegenConfig): Unit
    Definition Classes
    PythonWrappable
  29. def makeRFile(conf: CodegenConfig): Unit
    Definition Classes
    RWrappable
  30. val maximumPageLength: IntParam
  31. val minimumPageLength: IntParam
  32. val outputCol: Param[String]

    The name of the output column

    The name of the output column

    Definition Classes
    HasOutputCol
  33. lazy val params: Array[Param[_]]
    Definition Classes
    Params
  34. def pyAdditionalMethods: String
    Definition Classes
    PythonWrappable
  35. def pyInitFunc(): String
    Definition Classes
    PythonWrappable
  36. def save(path: String): Unit
    Definition Classes
    MLWritable
    Annotations
    @Since( "1.6.0" ) @throws( ... )
  37. final def set[T](param: Param[T], value: T): PageSplitter.this.type
    Definition Classes
    Params
  38. def setBoundaryRegex(v: String): PageSplitter.this.type
  39. def setInputCol(value: String): PageSplitter.this.type

    Definition Classes
    HasInputCol
  40. def setMaximumPageLength(v: Int): PageSplitter.this.type
  41. def setMinimumPageLength(v: Int): PageSplitter.this.type
  42. def setOutputCol(value: String): PageSplitter.this.type

    Definition Classes
    HasOutputCol
  43. def split(textOpt: String): Seq[String]
  44. def toString(): String
    Definition Classes
    Identifiable → AnyRef → Any
  45. def transform(dataset: Dataset[_]): DataFrame
    Definition Classes
    PageSplitter → Transformer
  46. def transform(dataset: Dataset[_], paramMap: ParamMap): DataFrame
    Definition Classes
    Transformer
    Annotations
    @Since( "2.0.0" )
  47. def transform(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): DataFrame
    Definition Classes
    Transformer
    Annotations
    @Since( "2.0.0" ) @varargs()
  48. def transformSchema(schema: StructType): StructType
    Definition Classes
    PageSplitter → PipelineStage
  49. val uid: String
    Definition Classes
    PageSplitterBasicLogging → Identifiable
  50. val ver: String
    Definition Classes
    BasicLogging
  51. def write: MLWriter
    Definition Classes
    DefaultParamsWritable → MLWritable