BinaryFileReader

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
def clone(): AnyRef

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( ... )
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def finalize(): Unit

Attributes
protected[java.lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
def hashCode(): Int

Definition Classes
AnyRef → Any
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
final def notifyAll(): Unit

Definition Classes
AnyRef
def read(path: String, recursive: Boolean, spark: SparkSession, sampleRatio: Double = 1, inspectZip: Boolean = true, seed: Long = 0L): DataFrame

Read the directory of binary files from the local or remote source
Read the directory of binary files from the local or remote source
path
Path to the directory
recursive
Recursive search flag
returns
DataFrame with a single column of "binaryFiles", see "columnSchema" for details
def readFromPaths(df: DataFrame, pathCol: String, bytesCol: String, concurrency: Int, timeout: Int): DataFrame

df
the dataframe containing the paths
pathCol
the column name of the paths to read
bytesCol
the column name of the resulting bytes column
concurrency
the number of concurrent reads
timeout
in milliseconds
def recursePath(fileSystem: FileSystem, path: Path, pathFilter: (FileStatus) ⇒ Boolean): Array[Path]
def stream(path: String, spark: SparkSession, sampleRatio: Double = 1, inspectZip: Boolean = true, seed: Long = 0L): DataFrame

Read the directory of binary files from the local or remote source
Read the directory of binary files from the local or remote source
path
Path to the directory
returns
DataFrame with a single column of "binaryFiles", see "columnSchema" for details
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )

Related Doc: package spark

object BinaryFileReader

Value Members

final def !=(arg0: Any): Boolean

final def ##(): Int

final def ==(arg0: Any): Boolean

final def asInstanceOf[T0]: T0

def clone(): AnyRef

final def eq(arg0: AnyRef): Boolean

def equals(arg0: Any): Boolean

def finalize(): Unit

final def getClass(): Class[_]

def hashCode(): Int

final def isInstanceOf[T0]: Boolean

final def ne(arg0: AnyRef): Boolean

final def notify(): Unit

final def notifyAll(): Unit

def read(path: String, recursive: Boolean, spark: SparkSession, sampleRatio: Double = 1, inspectZip: Boolean = true, seed: Long = 0L): DataFrame

def readFromPaths(df: DataFrame, pathCol: String, bytesCol: String, concurrency: Int, timeout: Int): DataFrame

def recursePath(fileSystem: FileSystem, path: Path, pathFilter: (FileStatus) ⇒ Boolean): Array[Path]

def stream(path: String, spark: SparkSession, sampleRatio: Double = 1, inspectZip: Boolean = true, seed: Long = 0L): DataFrame

final def synchronized[T0](arg0: ⇒ T0): T0

def toString(): String

final def wait(): Unit

final def wait(arg0: Long, arg1: Int): Unit

final def wait(arg0: Long): Unit

Inherited from AnyRef

Inherited from Any

Ungrouped