object SamplingUtils
- Alphabetic
- By Inheritance
- SamplingUtils
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
randomResample(input: Iterator[ColumnarBatch], fraction: Double, sorter: GpuSorter, converter: (Iterator[ColumnarBatch]) ⇒ Iterator[InternalRow], seed: Long = Random.nextLong()): Array[InternalRow]
Random sampling without replacement.
Random sampling without replacement.
- input
iterator to feed batches for sampling.
- fraction
the percentage of rows to randomly select
- sorter
used to add rows needed for sorting on the CPU later. The sorter should be setup for the schema of the input data and the output sampled rows will have any needed rows added to them as the sorter needs to.
- converter
used to convert a batch of data to rows. This should have been setup to convert to rows based of the expected output for the sorter.
- seed
the seed to the random number generator
- returns
the sampled rows
-
def
reservoirSampleAndCount(input: Iterator[ColumnarBatch], k: Int, sorter: GpuSorter, converter: (Iterator[ColumnarBatch]) ⇒ Iterator[InternalRow], seed: Long = Random.nextLong()): (Array[InternalRow], Long)
Reservoir sampling implementation that also returns the input size.
Reservoir sampling implementation that also returns the input size.
- input
iterator to feed batches for sampling.
- k
the number of rows to randomly select.
- sorter
used to add rows needed for sorting on the CPU later. The sorter should be setup for the schema of the input data and the output sampled rows will have any needed rows added to them as the sorter needs to.
- converter
used to convert a batch of data to rows. This should have been setup to convert to rows based of the expected output for the sorter.
- seed
the seed to the random number generator
- returns
(samples, input size)
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()