Class

ai.minxiao.ds4s.core.dl4j.mlnn

FCNN

Related Doc: package mlnn

Permalink

class FCNN extends MLNN with Serializable

Fully Connected Neural Network (FCNN)


BASE

Annotations
@SerialVersionUID()
Linear Supertypes
MLNN, NNBase, Serializable, Serializable, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. FCNN
  2. MLNN
  3. NNBase
  4. Serializable
  5. Serializable
  6. AnyRef
  7. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new FCNN(seed: Long = 2018L, l2: Double = 0.0, l1: Double = 0.0, l2Bias: Double = 0.0, l1Bias: Double = 0.0, weightNoise: Boolean = false, weightRetainProbability: Double = 1.0, applyToBiases: Boolean = false, optimizationAlgo: OptimizationAlgorithm = ..., miniBatch: Boolean = true, learningRate: Double = 0.1, beta1: Double = 0.9, beta2: Double = 0.999, epsilon: Double = 1E-8, momentum: Double = 0.9, rmsDecay: Double = 0.95, rho: Double = 0.95, updater: Updater = Updater.NESTEROVS, gradientNormalization: GradientNormalization = GradientNormalization.None, gradientNormalizationThreshold: Double = 1.0, inputSize: Int, inputType: Type = InputType.Type.FF, height: Int = 10, width: Int = 10, depth: Int = 10, channels: Int = 3, outputSize: Int, lossFunction: LossFunction, outputLayerActivation: Activation, outputLayerWeightInit: WeightInit = WeightInit.XAVIER, outputLayerBiasInit: Double = 0.0, weights: Array[Double] = Array[Double](), pretrain: Boolean = false, backprop: Boolean = true, backpropType: BackpropType = BackpropType.Standard, tBPTTForwardLength: Int = 100, tBPTTBackwardLength: Int = 100, setListener: Boolean = true, listenType: String = "console", listenFreq: Int = 1, storagePath: String = "", enableRemote: Boolean = false, nDenseLayers: Int = 1, denseLayerSizes: Array[Int] = Array(1), denseLayerActivations: Array[Activation] = Array(Activation.RELU), denseLayerWeightInits: Array[WeightInit] = Array(WeightInit.XAVIER), denseLayerBiasInits: Array[Double] = Array(0.0), denseLayerDropOuts: Array[Double] = Array(0.0))

    Permalink

    seed

    random generator seed, default=2018 ------------------------------------------------------------------------------------------------------------ REGULARIZATION

    l2

    l2 regularization, default=0.0

    l1

    l1 regularization, default=0.0

    l2Bias

    l2 bias term, default=0.0

    l1Bias

    l1 bias term, default=0.0

    weightNoise

    whether to use weight noise (drop connect), default=false

    weightRetainProbability

    weight retain probability for the weight noise (drop-connect), default=1 (no drop-connect)

    applyToBiases

    whether apply to biases for the weight noise (drop-connect), default=false ------------------------------------------------------------------------------------------------------------------ OPTIMIZATION

    optimizationAlgo

    optimization algorithm (default=STOCHASTIC_GRADIENT_DESCENT)

    STOCHASTIC_GRADIENT_DESCENT://StochasticGradientDescent.java
    LINE_GRADIENT_DESCENT://LineGradientDescent.java
    CONJUGATE_GRADIENT://ConjugateGradient.java
    LBFGS://LBFGS.java
    miniBatch

    whether to use mini-batch, default=true

    learningRate

    learning rate, default=0.1

    beta1

    gradient moving avg decay rate, default=0.9

    beta2

    gradient sqrt decay rate, default=0.999

    epsilon

    default=1E-8

    momentum

    NESTEROVS momentum, default=0.9

    rmsDecay

    RMSPROP decay rate, default=0.95

    rho

    ADADELTA decay rate, default=0.95

    updater

    weights updater, (default = NESTEROVS). Options:

    SGD: //Sgd.java
      learningRate: learning rate (default = 1E-3)
    ADAM: //Adam.java
      learningRate: learning rate, DEFAULT_ADAM_LEARNING_RATE = 1e-3;
      beta1: gradient moving avg decay rate, DEFAULT_ADAM_BETA1_MEAN_DECAY = 0.9;
      beta2: gradient sqrt decay rate, DEFAULT_ADAM_BETA2_VAR_DECAY = 0.999;
      epsilon: epsilon, DEFAULT_ADAM_EPSILON = 1e-8;
      //Adam: A Method for Stochastic Optimization
    ADAMAX: //AdaMax.java
      learningRate: learning rate, DEFAULT_ADAMAX_LEARNING_RATE = 1e-3;
      beta1: gradient moving avg decay rate, DEFAULT_ADAMAX_BETA1_MEAN_DECAY = 0.9;
      beta2: gradient sqrt decay rate, DEFAULT_ADAMAX_BETA2_VAR_DECAY = 0.999;
      epsilon: epsilon, DEFAULT_ADAMAX_EPSILON = 1e-8;
      //Adam: A Method for Stochastic Optimization
    NADAM://Nadam.java
      learningRate: learning rate, DEFAULT_NADAM_LEARNING_RATE = 1e-3;
      epsilon: DEFAULT_NADAM_EPSILON = 1e-8;
      beta1: gradient moving avg decay rate, DEFAULT_NADAM_BETA1_MEAN_DECAY = 0.9;
      beta2: gradient sqrt decay rate, DEFAULT_NADAM_BETA2_VAR_DECAY = 0.999;
      //An overview of gradient descent optimization algorithms
    AMSGRAD: //AMSGrad.java
      learningRate: learning rate, DEFAULT_AMSGRAD_LEARNING_RATE = 1e-3;
      epsilon: DEFAULT_AMSGRAD_EPSILON = 1e-8;
      beta1: DEFAULT_AMSGRAD_BETA1_MEAN_DECAY = 0.9;
      beta2: DEFAULT_AMSGRAD_BETA2_VAR_DECAY = 0.999;
    ADAGRAD: Vectorized Learning Rate used per Connection Weight//AdaGrad.java
      learningRate: learning rate, DEFAULT_ADAGRAD_LEARNING_RATE = 1e-1;
      epsilon: DEFAULT_ADAGRAD_EPSILON = 1e-6;
      //Adaptive Subgradient Methods for Online Learning and Stochastic Optimization
      //Adagrad – eliminating learning rates in stochastic gradient descent
    NESTEROVS: tracks previous layer's gradient and uses it as a way of updating the gradient //Nesterovs.java
      learningRate: learning rate, DEFAULT_NESTEROV_LEARNING_RATE = 0.1;
      momentum: DEFAULT_NESTEROV_MOMENTUM = 0.9;
    RMSPROP: //RmsProp.java
      learningRate: learning rate, DEFAULT_RMSPROP_LEARNING_RATE = 1e-1;
      epsilon: DEFAULT_RMSPROP_EPSILON = 1e-8;
      rmsDecay: decay rate, DEFAULT_RMSPROP_RMSDECAY = 0.95;
      //Neural Networks for Machine Learning
    ADADELTA: //AdaDelta.java
      rho: decay rate, controlling the decay of the previous parameter updates, DEFAULT_ADADELTA_RHO = 0.95;
      epsilon: DEFAULT_ADADELTA_EPSILON = 1e-6;
      (no need to manually set the learning rate)
      //ADADELTA: AN ADAPTIVE LEARNING RATE METHOD
    NONE: no updates //NoOp.java
    gradientNormalization

    gradient normalization, default=None Options: GradientNormalization.X

    ClipElementWiseAbsoluteValue:
     g <- sign(g)*max(maxAllowedValue,|g|).
    ClipL2PerLayer:
      GOut = G                             if l2Norm(G) < threshold (i.e., no change)
      GOut = threshold * G / l2Norm(G)     otherwise
    ClipL2PerParamType: conditional renormalization. Very similar to ClipL2PerLayer, however instead of clipping per layer, do clipping on each parameter type separately.
    None: no gradient normalization
    RenormalizeL2PerLayer: rescale gradients by dividing by the L2 norm of all gradients for the layer
    RenormalizeL2PerParamType:
     GOut_weight = G_weight / l2(G_weight)
     GOut_bias = G_bias / l2(G_bias)
    gradientNormalizationThreshold

    gradient threshold, default=0.5 ------------------------------------------------------------------------------------------------------------------------------------------ INPUT LAYER

    inputSize

    input size, required

    inputType

    input type, default=InputType.Type.FF Options:

    InputType.Type.FF: Standard feed-foward (2d minibatch, 1d per example) data
    InputType.Type.CNN: 2D Convolutional neural network (4d minibatch, [miniBatchSize, channels, height, width])
    InputType.Type.CNN3D: 3D convolutional neural network (5d minibatch, [miniBatchSize, channels, height, width, channels])
    InputType.Type.CNNFlat: Flattened 2D conv net data (2d minibatch, [miniBatchSize, height * width * channels])
    InputType.Type.RNN: Recurrent neural network (3d minibatch) time series data
    height

    height of input, default=10

    width

    width of input, default=10

    depth

    depth of input, default=10

    channels

    number of channels, default=3 ------------------------------------------------------------------------------------------------------------------------------------------ OUTPUT LAYER

    outputSize

    output size, required

    lossFunction

    loss function for the output layer, required Options: y-true, yHat-prediction

    L2: Sum of Squared Errors//LossL2.java
      L = sum_i (y_i - yHat_i)^2
    MSE (or SQUARED_LOSS): Mean Squared Error//LossMSE.java
      L = 1/(2N) sum_i sum_j (y_{i,j} - yHat_{i,j})^2
    L1: Sum of Absolute Errors//LossL1.java
      L = sum_i |y_i - yHat_i|
    MEAN_ABSOLUTE_ERROR: Mean Absolute Error//LossMAE.java
      L = 1/(2N) sum_i sum_j |y_{i,j} - yHat_{i,j}|
    MEAN_ABSOLUTE_PERCENTAGE_ERROR: Mean Aboluste Percentage Error//LossMAPE.java
      L = 1/N sum_i |y_i - yHat_i|*100/|y_i|
    MEAN_SQUARED_LOGARITHMIC_ERROR: Mean Squared Logarithmic Error//LossMSLE.java
      L = 1/N sum_i (log(1 + y_i) - log(1 + yHat_i))^2
    POISSON (or EXPLL): Exponential Log Likelihood Loss (Poisson Loss)//LossPoisson.java
      L = 1/N sum_i (yHat_i - y_i * log(yHat_i))
    XENT: Binary Cross Entropy Loss//LossBinaryXENT.java
      L = - 1/N (y_i*log(yHat_i) + (1 - y_i)*log(1 - yHat_i))
      (label scalar of 0/1 binary classes)
    MCXENT: Multiclass Cross Entropy Loss//LossMCXENT.java
      L = - 1/N \sum_i \sum_k y_{i,k} * log(yHat_{i, k})
      (label vector of 0/1 indicator labels)
    NEGATIVELOGLIKELIHOOD: Negative Log Likelihood//LossNegativeLogLikelihood.java
      L = - 1/N \sum_i \sum_k y_{i,k} * log(yHat_{i, k})
      (*negative log likelihood is equivalent to cross entropy mathematically)
    KL_DIVERGENCE (or RECONSTRUCTION_CROSSENTROPY): Kullback Leibler Divergence Loss//LossKLD.java
      L = - 1/N sum_i y_i * log (yHat_i / y_i)
        = 1/N sum_i y_i * log (y_i / yHat_i)
        = 1/N ( sum_i y_i * log(y_i) - sum_i y_i * log(yHat_i))
        =        entropy                    cross-entropy
    COSINE_PROXIMITY://LossCosineProximity.java
      L = (sum_i y_i dotprod yHat_i)/(sqrt(sum_i y_i dotprod y_i) * sqrt(sum_i yHat_i dotprod yHat_i))
    HINGE: Hinge Loss//LossHinge.java
      L = 1/N sum_i max(0, 1 - yHat_i * y_i)
      (*label scalar of -1/+1 labels)
    SQUARED_HINGE: Squared Hinge Loss//LossSquaredHinge.java
      L = 1/N sum_i (max(0, 1 - yHat_i * y_i))^2
      (*label scalar of -1/+1 labels)
    outputLayerActivation

    output layer activation functions, required. Options:

    Cube://ActivationCube.java
      f(x) = x^3
    ELU://ActivationELU.java
             ⎧ alpha * (exp(x) - 1.0), x <  0; // alpha defaults to 1, if not specified
      f(x) = ⎨
             ⎩                      x, x >= 0;
    HARDSIGMOID://ActivationHardSigmoid.java
      f(x) = min(1, max(0, 0.2*x + 0.5))
    HARDTANH://ActivationHardTanH.java1, if x >  1
      f(x) = ⎨ -1, if x < -1
             ⎩  x, otherwise
    IDENTITY://ActivationIdentity.java
      f(x) = x
    LEAKYRELU://ActivationLReLU.java
      f(x) = max(0, x) + alpha * min(0, x) // alpha defaults to 0.01
    RRELU://ActivationRReLU.java
      f(x) = max(0,x) + alpha * min(0, x)
        // alpha is drawn from uniform(l,u) during training and is set to l+u/2 during test
        // l and u default to 1/8 and 1/3 respectively
      // Empirical Evaluation of Rectified Activations in Convolutional Network
    RATIONALTANH://ActivationRationalTanh.java
      f(x) = 1.7159 * tanh(2x/3), where tanh is approxiated as tanh(y) ~ sgn(y) * { 1 - 1/(1+|y|+y^2+1.41645*y^4)}
      //Reference
    RELU://ActivationReLU.java
      f(x) = max(0, x)
    //RELU6://ActivationReLU6.java
    //  f(x) = min(max(x, 0), 6)
    RECTIFIEDTANH://ActivationRectifiedTanh.java
      f(x) = max(0, tanh(x))
    SELU://ActivationSELU.java
                    ⎧                      x, x > 0
      f(x) = lambda ⎨
                    ⎩ alpha * exp(x) - alpha, x <= 0
      //Reference
    SIGMOID://ActivationSigmoid.java
      f(x) = 1 / (1 + exp(-x))
    SOFTPLUS://ActivationSoftPlus.java
      f(x) = log(1 + exp(x))
    SOFTSIGN://ActivationSoftSign.java
      f_i(x) = x_i / (1 + |x_i|)
    SOFTMAX://ActivationSoftmax.java
      f_i(x) = exp(x_i - shift) / sum_j exp(x_j - shift), where shift = max_i x_i
    SWISH://ActivationSwish.java
      f(x) = x * sigmoid(x)
    TANH: //ActivationTanH.java
      f(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))
    outputLayerWeightInit

    output layer weight initialization, default=XAVIER. Options:

    ZERO: all 0s.
    ONES: all 1s.
    SIGMOID_UNIFORM: U(-r,r) with r=4*sqrt(6/(fanIn + fanOut)), A version of XAVIER_UNIFORM for sigmoid activation functions.
    NORMAL: N(0, sigma^2) with sigma = 1/sqrt(fanIn).
    LECUN_UNIFORM: U[-a,a] with a=3/sqrt(fanIn).
    UNIFORM: U[-a,a] with a=1/sqrt(fanIn).
    XAVIER: N(0, sigma^2) with sigma = sqrt(2.0/(fanIn + fanOut))
    XAVIER_UNIFORM: U(-s,s) with s = sqrt(6/(fanIn + fanOut))
    XAVIER_FAN_IN: N(0, sigma^2) with sigma = sqrt(1/fanIn)
    RELU: N(0, sigma^2) with sigma = sqrt(2.0/nIn)
    RELU_UNIFORM: U(-s,s) with s = sqrt(6/fanIn)
    IDENTITY: I_{nIn, nOut} an identity matrix, only applicable to square weight matrices
    VAR_SCALING_NORMAL_FAN_IN: N(0, sigma^2) with sigma = sqrt(1.0/fanIn)
    VAR_SCALING_NORMAL_FAN_OUT: N(0, sigma^2) with sigma = sqrt(1.0/fanOut)
    VAR_SCALING_NORMAL_FAN_AVG: N(0, sigma^2) with sigma = sqrt(1.0/((fanIn + fanOut)/2))
    VAR_SCALING_UNIFORM_FAN_IN: U[-a,a] with a=3.0/(fanIn)
    VAR_SCALING_UNIFORM_FAN_OUT: U[-a,a] with a=3.0/(fanOut)
    VAR_SCALING_UNIFORM_FAN_AVG: U[-a,a] with a=3.0/((fanIn + fanOut)/2)
    outputLayerBiasInit

    output layer bias initialization, default=0.0

    weights

    instance weights-based on classes, applicable for weighted classification, default=Array[Double]() -------------------------------------------------------------------------------------------------------------------------------------------------- BASE FOR LAYERS

    pretrain

    whether to pretrain, default=false

    backprop

    whether to use backprop, default=true

    setListener

    whether to set a listener, default=true

    listenType

    listener type, default="console" Options:

    console: print in the console
    ui: display in the UI
    file: save to a file
    listenFreq

    listener frequency to track the score, default=1

    storagePath

    file path for saving the stats if set listenType="file", default="", not used

    enableRemote

    whether to enable remote listening, default=false -------------------------------------------------------------------------------------------- Fully Connected Dense Layers

    nDenseLayers

    number of dense layers, default=1

    denseLayerSizes

    sizes of dense layers, default=Array(1), for index beyond the boundary, use the last one

    denseLayerActivations

    activations of dense layers, default=Array(Activation.RELU)

    denseLayerWeightInits

    weight initializer of dense layers, default=Array(WeightInit.XAVIER)

    denseLayerBiasInits

    bias initializer of dense layers, default=Array(0.0)

    denseLayerDropOuts

    dropouts of dense layers, default=Array(0.0)

Value Members

  1. final def !=(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  4. final def asInstanceOf[T0]: T0

    Permalink
    Definition Classes
    Any
  5. lazy val baseConfBuilder: Builder

    Permalink

    Base Configuration Builder

    Base Configuration Builder

    Attributes
    protected
    Definition Classes
    NNBase
  6. def baseLayerConfBuilder(listBuilder: ListBuilder): ListBuilder

    Permalink

    Base Configurations for Layers

    Base Configurations for Layers

    Attributes
    protected
    Definition Classes
    MLNN
  7. def clone(): AnyRef

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  8. def conf: MultiLayerConfiguration

    Permalink

    Chain all configurations: Base -> Reg -> Opt -> Layer

    Chain all configurations: Base -> Reg -> Opt -> Layer

    Attributes
    protected
    Definition Classes
    MLNN
  9. def denseLayerConfBuilder(listBuilder: ListBuilder, startIndex: Int, startInSize: Int): ListBuilder

    Permalink
    Attributes
    protected
  10. final def eq(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  11. def equals(arg0: Any): Boolean

    Permalink
    Definition Classes
    AnyRef → Any
  12. def finalize(): Unit

    Permalink
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  13. final def getClass(): Class[_]

    Permalink
    Definition Classes
    AnyRef → Any
  14. def hashCode(): Int

    Permalink
    Definition Classes
    AnyRef → Any
  15. final def isInstanceOf[T0]: Boolean

    Permalink
    Definition Classes
    Any
  16. def layerConfBuilder(confBuilder: Builder): ListBuilder

    Permalink

    Layer Configuration Builder

    Layer Configuration Builder

    Attributes
    protected
    Definition Classes
    FCNNMLNN
  17. def model: MultiLayerNetwork

    Permalink

    Build the MLNN and Start the Listener (if applicable)

    Build the MLNN and Start the Listener (if applicable)

    returns

    the initiated multi-layer network

    Definition Classes
    MLNN
  18. final def ne(arg0: AnyRef): Boolean

    Permalink
    Definition Classes
    AnyRef
  19. final def notify(): Unit

    Permalink
    Definition Classes
    AnyRef
  20. final def notifyAll(): Unit

    Permalink
    Definition Classes
    AnyRef
  21. def optConfBuilder(confBuilder: Builder): Builder

    Permalink

    Optimization Configuration Builder

    Optimization Configuration Builder

    Attributes
    protected
    Definition Classes
    NNBase
  22. def outputLayerConfBuilder(listBuilder: ListBuilder, index: Int, nIn: Int): ListBuilder

    Permalink
    Attributes
    protected
    Definition Classes
    MLNN
  23. def params(net: MultiLayerNetwork, disp: Boolean = true): (Int, Long)

    Permalink

    Print Parameters

    Print Parameters

    returns

    number of layer s & total number of parameters

    Definition Classes
    MLNN
  24. def regConfBuilder(confBuilder: Builder): Builder

    Permalink

    Regularization Configuration Builder

    Regularization Configuration Builder

    Attributes
    protected
    Definition Classes
    NNBase
  25. def startListener(net: MultiLayerNetwork): Unit

    Permalink

    Start Listener

    Start Listener

    Attributes
    protected
    Definition Classes
    MLNN
  26. final def synchronized[T0](arg0: ⇒ T0): T0

    Permalink
    Definition Classes
    AnyRef
  27. def toString(): String

    Permalink
    Definition Classes
    AnyRef → Any
  28. final def wait(): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  29. final def wait(arg0: Long, arg1: Int): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  30. final def wait(arg0: Long): Unit

    Permalink
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Inherited from MLNN

Inherited from NNBase

Inherited from Serializable

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

Ungrouped