package ai.minxiao.ds4s.core.dl4j.vectorization

import org.deeplearning4j.datasets.iterator.impl.EmnistDataSetIterator
import org.deeplearning4j.datasets.iterator.impl.EmnistDataSetIterator.{Set => EmnistDataSet}

/**
  * EmnistIterator
  *
  * <a href="https://arxiv.org/pdf/1702.05373.pdf">EMNIST: an extension of MNIST to handwritten letters</a></br>
  * <a href="https://www.nist.gov/itl/iad/image-group/emnist-dataset">The EMNIST Dataset</a></br>
  * <table style="width:50%">
  * <caption align="top|left">STRUCTURE AND ORGANIZATION OF THE EMNIST DATASETS.</caption>
  *   <tr><th>Name</th><th>Classes</th><th>No. Training</th><th>No. Testing</th><th>Validation</th><th>Total</th></tr>
  *   <tr><th>By_Class</th><th>62</th><th>697,932</th><th>116,323</th><th>No</th><th>814,255</th></tr>
  *   <tr><th>By_Merge</th><th>47</th><th>697,932</th><th>116,323</th><th>No</th><th>814,255</th></tr>
  *   <tr><th>Balanced</th><th>47</th><th>112,800</th><th>18,800</th><th>Yes</th><th>131,600</th></tr>
  *   <tr><th>Digits</th><th>10</th><th>240,000</th><th>40,000</th><th>Yes</th><th>280,000</th></tr>
  *   <tr><th>Letters</th><th>37</th><th>88,800</th><th>14,800</th><th>Yes</th><th>103,600</th></tr>
  *   <tr><th>MNIST</th><th>10</th><th>60,000</th><th>10,000</th><th>Yes</th><th>70,000</th></tr>
  * </table>
  *
  * @author mx
  */
object EmnistIterator {

  /**
    * @param dataset dataset type
    * Options: EmnistDataSet.X:
    * {{{
    * COMPLETE: 62 unbalanced classes, 814,255 examples total (train + test).
    * MERGE:    47 unbalanced classes, 814,255 examples total. Combines lower and upper case characters (that are difficult to distinguish) into one class for each letter (instead of 2), for letters C, I, J, K, L, M, O, P, S, U, V, W, X, Y and Z
    * BALANCED: 47 balanced classes,   131,600 examples total.
    * LETTERS:  26 balanced classes,   145,600 examples total.
    * DIGITS:   10 balanced classes,   280,000 examples total.
    * MNIST:    10 balanced classes.,   70,000 examples total. Equivalent to the original MNIST dataset in MnistDataSetIterator
    * }}}
    * @param batchSize batch size
    * @param binarize whether to binarize features
    * @param train whether to get the train (train=true) or the test (train=false) split
    * @param shuffle whether to shuffle the dataset
    * @param seed seed for the random generator
    * @return dataset iterator
    */
  def run(dataset: EmnistDataSet, batchSize: Int, binarize: Boolean = false, train: Boolean = true, shuffle: Boolean = true, seed: Long = 2018L): EmnistDataSetIterator =
    new EmnistDataSetIterator(dataset, batchSize, binarize, train, shuffle, seed)
}
