class BigSizedJoinIterator extends Iterator[ColumnarBatch] with TaskAutoCloseableResource
Iterator that produces the result of a large symmetric join where the build side of the join is too large for a single GPU batch. The prior join input probing phase has sized the build side of the join, so this partitions both the build side and stream side into N+1 partitions, where N is the size of the build side divided by the target GPU batch size.
Once the build side is partitioned completely, the partitions are placed into "join groups" where all the build side data of a join group fits in the GPU target batch size. If the input data is skewed, a single build partition could be larger than the target GPU batch size. Currently such oversized partitions are placed in separate join groups consisting just of one partition each in the hopes that there will be enough GPU memory to proceed with the join despite the skew. We will need to revisit this for very large, skewed build side data arriving at a single task.
Once the build side join groups are identified, each stream batch is partitioned into the same number of partitions as the build side with the same hash key used for the build side. The partitions from the batch are grouped into join groups matching the partition grouping from the build side, and each join group is processed as a sub-join. Once all the join groups for a stream batch have been processed, the next stream batch is fetched, partitioned, and sub-joins are processed against the build side join groups. Repeat until the stream side is exhausted.
- Alphabetic
- By Inheritance
- BigSizedJoinIterator
- TaskAutoCloseableResource
- AutoCloseable
- Iterator
- TraversableOnce
- GenTraversableOnce
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
Type Members
-
class
GroupedIterator[B >: A] extends AbstractIterator[Seq[B]] with Iterator[Seq[B]]
- Definition Classes
- Iterator
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
def
++[B >: ColumnarBatch](that: ⇒ GenTraversableOnce[B]): Iterator[B]
- Definition Classes
- Iterator
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
addString(b: StringBuilder): StringBuilder
- Definition Classes
- TraversableOnce
-
def
addString(b: StringBuilder, sep: String): StringBuilder
- Definition Classes
- TraversableOnce
-
def
addString(b: StringBuilder, start: String, sep: String, end: String): StringBuilder
- Definition Classes
- TraversableOnce
-
def
aggregate[B](z: ⇒ B)(seqop: (B, ColumnarBatch) ⇒ B, combop: (B, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
buffered: BufferedIterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
close(): Unit
- Definition Classes
- BigSizedJoinIterator → TaskAutoCloseableResource → AutoCloseable
-
val
closed: Boolean
- Attributes
- protected
- Definition Classes
- TaskAutoCloseableResource
-
def
collect[B](pf: PartialFunction[ColumnarBatch, B]): Iterator[B]
- Definition Classes
- Iterator
- Annotations
- @migration
- Migration
(Changed in version 2.8.0)
collecthas changed. The previous behavior can be reproduced withtoSeq.
-
def
collectFirst[B](pf: PartialFunction[ColumnarBatch, B]): Option[B]
- Definition Classes
- TraversableOnce
-
def
contains(elem: Any): Boolean
- Definition Classes
- Iterator
-
def
copyToArray[B >: ColumnarBatch](xs: Array[B], start: Int, len: Int): Unit
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
copyToArray[B >: ColumnarBatch](xs: Array[B]): Unit
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
copyToArray[B >: ColumnarBatch](xs: Array[B], start: Int): Unit
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
copyToBuffer[B >: ColumnarBatch](dest: Buffer[B]): Unit
- Definition Classes
- TraversableOnce
-
def
corresponds[B](that: GenTraversableOnce[B])(p: (ColumnarBatch, B) ⇒ Boolean): Boolean
- Definition Classes
- Iterator
-
def
count(p: (ColumnarBatch) ⇒ Boolean): Int
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
drop(n: Int): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
dropWhile(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
duplicate: (Iterator[ColumnarBatch], Iterator[ColumnarBatch])
- Definition Classes
- Iterator
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
exists(p: (ColumnarBatch) ⇒ Boolean): Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
filter(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
filterNot(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
find(p: (ColumnarBatch) ⇒ Boolean): Option[ColumnarBatch]
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
flatMap[B](f: (ColumnarBatch) ⇒ GenTraversableOnce[B]): Iterator[B]
- Definition Classes
- Iterator
-
def
fold[A1 >: ColumnarBatch](z: A1)(op: (A1, A1) ⇒ A1): A1
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
foldLeft[B](z: B)(op: (B, ColumnarBatch) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
foldRight[B](z: B)(op: (ColumnarBatch, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
forall(p: (ColumnarBatch) ⇒ Boolean): Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
foreach[U](f: (ColumnarBatch) ⇒ U): Unit
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
grouped[B >: ColumnarBatch](size: Int): GroupedIterator[B]
- Definition Classes
- Iterator
-
def
hasDefiniteSize: Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
hasNext: Boolean
- Definition Classes
- BigSizedJoinIterator → Iterator
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
indexOf[B >: ColumnarBatch](elem: B, from: Int): Int
- Definition Classes
- Iterator
-
def
indexOf[B >: ColumnarBatch](elem: B): Int
- Definition Classes
- Iterator
-
def
indexWhere(p: (ColumnarBatch) ⇒ Boolean, from: Int): Int
- Definition Classes
- Iterator
-
def
indexWhere(p: (ColumnarBatch) ⇒ Boolean): Int
- Definition Classes
- Iterator
-
def
isEmpty: Boolean
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraversableAgain: Boolean
- Definition Classes
- Iterator → GenTraversableOnce
-
def
length: Int
- Definition Classes
- Iterator
-
def
map[B](f: (ColumnarBatch) ⇒ B): Iterator[B]
- Definition Classes
- Iterator
-
def
max[B >: ColumnarBatch](implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
maxBy[B](f: (ColumnarBatch) ⇒ B)(implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
min[B >: ColumnarBatch](implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
minBy[B](f: (ColumnarBatch) ⇒ B)(implicit cmp: Ordering[B]): ColumnarBatch
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
mkString: String
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
mkString(sep: String): String
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
mkString(start: String, sep: String, end: String): String
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
next(): ColumnarBatch
- Definition Classes
- BigSizedJoinIterator → Iterator
-
def
nonEmpty: Boolean
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
padTo[A1 >: ColumnarBatch](len: Int, elem: A1): Iterator[A1]
- Definition Classes
- Iterator
-
def
partition(p: (ColumnarBatch) ⇒ Boolean): (Iterator[ColumnarBatch], Iterator[ColumnarBatch])
- Definition Classes
- Iterator
-
def
patch[B >: ColumnarBatch](from: Int, patchElems: Iterator[B], replaced: Int): Iterator[B]
- Definition Classes
- Iterator
-
def
product[B >: ColumnarBatch](implicit num: Numeric[B]): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduce[A1 >: ColumnarBatch](op: (A1, A1) ⇒ A1): A1
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceLeft[B >: ColumnarBatch](op: (B, ColumnarBatch) ⇒ B): B
- Definition Classes
- TraversableOnce
-
def
reduceLeftOption[B >: ColumnarBatch](op: (B, ColumnarBatch) ⇒ B): Option[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceOption[A1 >: ColumnarBatch](op: (A1, A1) ⇒ A1): Option[A1]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceRight[B >: ColumnarBatch](op: (ColumnarBatch, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reduceRightOption[B >: ColumnarBatch](op: (ColumnarBatch, B) ⇒ B): Option[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
reversed: List[ColumnarBatch]
- Attributes
- protected[this]
- Definition Classes
- TraversableOnce
-
def
sameElements(that: Iterator[_]): Boolean
- Definition Classes
- Iterator
-
def
scanLeft[B](z: B)(op: (B, ColumnarBatch) ⇒ B): Iterator[B]
- Definition Classes
- Iterator
-
def
scanRight[B](z: B)(op: (ColumnarBatch, B) ⇒ B): Iterator[B]
- Definition Classes
- Iterator
-
def
seq: Iterator[ColumnarBatch]
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
size: Int
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
sizeHintIfCheap: Int
- Attributes
- protected[collection]
- Definition Classes
- GenTraversableOnce
-
def
slice(from: Int, until: Int): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
sliceIterator(from: Int, until: Int): Iterator[ColumnarBatch]
- Attributes
- protected
- Definition Classes
- Iterator
-
def
sliding[B >: ColumnarBatch](size: Int, step: Int): GroupedIterator[B]
- Definition Classes
- Iterator
-
def
span(p: (ColumnarBatch) ⇒ Boolean): (Iterator[ColumnarBatch], Iterator[ColumnarBatch])
- Definition Classes
- Iterator
-
def
sum[B >: ColumnarBatch](implicit num: Numeric[B]): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
take(n: Int): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
takeWhile(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
to[Col[_]](implicit cbf: CanBuildFrom[Nothing, ColumnarBatch, Col[ColumnarBatch]]): Col[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toArray[B >: ColumnarBatch](implicit arg0: ClassTag[B]): Array[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toBuffer[B >: ColumnarBatch]: Buffer[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toIndexedSeq: IndexedSeq[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toIterable: Iterable[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toIterator: Iterator[ColumnarBatch]
- Definition Classes
- Iterator → GenTraversableOnce
-
def
toList: List[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toMap[T, U](implicit ev: <:<[ColumnarBatch, (T, U)]): Map[T, U]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toSeq: Seq[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toSet[B >: ColumnarBatch]: Set[B]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
toStream: Stream[ColumnarBatch]
- Definition Classes
- Iterator → GenTraversableOnce
-
def
toString(): String
- Definition Classes
- Iterator → AnyRef → Any
-
def
toTraversable: Traversable[ColumnarBatch]
- Definition Classes
- Iterator → TraversableOnce → GenTraversableOnce
-
def
toVector: Vector[ColumnarBatch]
- Definition Classes
- TraversableOnce → GenTraversableOnce
-
def
use[T <: AutoCloseable](ac: T): T
- Definition Classes
- TaskAutoCloseableResource
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
withFilter(p: (ColumnarBatch) ⇒ Boolean): Iterator[ColumnarBatch]
- Definition Classes
- Iterator
-
def
zip[B](that: Iterator[B]): Iterator[(ColumnarBatch, B)]
- Definition Classes
- Iterator
-
def
zipAll[B, A1 >: ColumnarBatch, B1 >: B](that: Iterator[B], thisElem: A1, thatElem: B1): Iterator[(A1, B1)]
- Definition Classes
- Iterator
-
def
zipWithIndex: Iterator[(ColumnarBatch, Int)]
- Definition Classes
- Iterator
Deprecated Value Members
-
def
/:[B](z: B)(op: (B, ColumnarBatch) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
- Annotations
- @deprecated
- Deprecated
(Since version 2.12.10) Use foldLeft instead of /:
-
def
:\[B](z: B)(op: (ColumnarBatch, B) ⇒ B): B
- Definition Classes
- TraversableOnce → GenTraversableOnce
- Annotations
- @deprecated
- Deprecated
(Since version 2.12.10) Use foldRight instead of :\