object ClusterUtil
- Alphabetic
- By Inheritance
- ClusterUtil
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getDefaultNumExecutorCores(spark: SparkSession, log: Logger, master: Option[String] = None): Int
Get number of default cores from sparkSession(required) or master(optional) for 1 executor.
Get number of default cores from sparkSession(required) or master(optional) for 1 executor.
- spark
The current spark session. If master parameter is not set, the master in the spark session is used.
- master
This param is needed for unittest. If set, the function return the value for it. if not set, basically, master in spark (SparkSession) is used.
- returns
The number of default cores per executor based on master.
- def getDriverHost(spark: SparkSession): String
-
def
getExecutors(spark: SparkSession): Array[(Int, String)]
Returns a list of executor id and host.
Returns a list of executor id and host.
- spark
The current spark session.
- returns
List of executors as an array of (id,host).
- def getHostToIP(hostname: String): String
- def getJVMCPUs(spark: SparkSession): Int
-
def
getNumExecutorTasks(spark: SparkSession, numTasksPerExec: Int, log: Logger): Int
Returns the number of executors * number of tasks.
Returns the number of executors * number of tasks.
- spark
The current spark session.
- numTasksPerExec
The number of tasks per executor.
- returns
The number of executors * number of tasks.
-
def
getNumRowsPerPartition(df: DataFrame, labelCol: Column): Array[Long]
Get number of rows per partition of a dataframe.
Get number of rows per partition of a dataframe. Note that this will execute a full distributed Spark app query.
- df
The dataframe.
- returns
The number of rows per partition (where partitionId is the array index).
-
def
getNumTasksPerExecutor(spark: SparkSession, log: Logger): Int
Get number of tasks from dummy dataset for 1 executor.
Get number of tasks from dummy dataset for 1 executor. Note: all executors have same number of cores, and this is more reliable than getting value from conf.
- spark
The current spark session.
- log
The Logger.
- returns
The number of tasks per executor.
- def getTaskCpus(sparkContext: SparkContext, log: Logger): Int
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()