package models
- Alphabetic
- Public
- All
Type Members
- sealed trait BatchETL extends AnyRef
- case class BatchETLModel(name: String, inputs: List[ReaderModel], output: WriterModel, mlModels: List[MlModelOnlyInfo], strategy: Option[StrategyModel], kafkaAccessType: String, group: String = "default", isActive: Boolean = false) extends BatchETL with Product with Serializable
- case class BatchGdprETLModel(name: String, dataStores: List[DataStoreConf], strategyConfig: String, inputs: List[ReaderModel], output: WriterModel, group: String = "default", isActive: Boolean = false) extends BatchETL with Product with Serializable
- case class BatchJobExclusionConfig(isFullyExclusive: Boolean, restConfigExclusiveParams: Seq[String]) extends Product with Serializable
- case class BatchJobInstanceModel(name: String, instanceOf: String, startTimestamp: Long, currentStatusTimestamp: Long, status: JobStatus, restConfig: Config = ConfigFactory.empty, error: Option[String] = None) extends Model with Product with Serializable
- trait BatchJobJsonSupport extends DefaultJsonProtocol
- case class BatchJobModel(name: String, description: String, owner: String, system: Boolean, creationTime: Long, etl: BatchETL, exclusivityConfig: BatchJobExclusionConfig = ...) extends Model with Product with Serializable
- case class BatchSchedulerModel(name: String, cronExpression: String, batchJob: Option[String], options: Option[BsonDocument] = None, isActive: Boolean = true) extends Model with Product with Serializable
-
case class
CdcModel(name: String, uri: String, schema: String, options: CdcOptions = CdcOptions.default) extends DatastoreModel with Product with Serializable
A named model for mutations coming from a CDC tool.
A named model for mutations coming from a CDC tool. This model should be used together with the Cdc writer plugin in order to write these mutations into a Delta Lake table on HDFS.
uriis the location on HDFS where the Delta Table will be created.schemais a json-encoded DataFrame schema, that is, a StructType. See DataType.fromJson and DataType.json.optionscontrol the underlying spark DeltaLakeWriter in the writers using an instance of this model.- name
the name of the datastore
- uri
the uri where the data are meant to be written
- schema
the schema of the data
- options
the options for the datastore
-
case class
CdcOptions(saveMode: String, format: String = "delta", extraOptions: Option[Map[String, String]] = None, partitionBy: Option[List[String]] = None) extends Product with Serializable
Options for a CdcModel:
Options for a CdcModel:
saveModespecifies the behaviour when saving and the output uri already exists; valid values are:- "error", throw an error and do not save anything
- "overwrite", overwrite existing data
- "append", append to existing data
- "ignore", do not save anything and don't throw any errors
- "default", like "error" for it.agilelab.bigdata.wasp.consumers.SparkWriter, like "append" for it.agilelab.bigdata.wasp.consumers.SparkStreamingWriter
formatspecifies the data format to use; valid values are:- "delta" (this is the default)
- "parquet"
- "orc"
- "json"
- any format accepted by the available Spark DataFrameWriters
extraOptionsallows specifying any writer-specific options accepted by DataFrameReader/Writer.optionpartitionByallows specifying columns to be used to partition the data by using different directories for different values- saveMode
specifies the behaviour when the output uri exists
- format
specifies the format to use
- extraOptions
extra options for the underlying writer
- case class CompletionModel(toComplete: String, info: String) extends Product with Serializable
- case class ContainsRawMatchingStrategy(dataframeKeyMatchingExpression: String) extends RawMatchingStrategy with Product with Serializable
- case class CountEntry(timestamp: Instant, count: Map[String, Int]) extends Product with Serializable
- case class Counts(logs: Seq[CountEntry], telemetry: Seq[CountEntry], events: Seq[CountEntry]) extends Product with Serializable
- case class DashboardModel(url: String, needsFilterBox: Boolean) extends Product with Serializable
- sealed trait DataStoreConf extends AnyRef
- trait DataStoreConfJsonSupport extends DefaultJsonProtocol
-
abstract
class
DatastoreModel extends Model
Base datastore model.
- case class DocumentModel(name: String, connectionString: String, schema: String) extends DatastoreModel with Product with Serializable
- case class ErrorModel(fileName: String, where: String, errorType: String, msg: String, content: String, indicator: String) extends Product with Serializable
- case class EventEntry(eventType: String, eventId: String, severity: String, payload: String, timestamp: Instant, source: String, sourceId: String, eventRuleName: String) extends Product with Serializable
- case class Events(found: Long, entries: Seq[EventEntry]) extends Product with Serializable
- final case class ExactKeyValueMatchingStrategy() extends KeyValueMatchingStrategy with Product with Serializable
- case class ExactRawMatchingStrategy(dataframeKeyMatchingExpression: String) extends RawMatchingStrategy with Product with Serializable
- case class FreeCode(code: String) extends Product with Serializable
- case class FreeCodeModel(name: String, code: String) extends Model with Product with Serializable
- case class GdprStrategyModel(className: String, dataStoresConf: List[DataStoreConf], configuration: Option[String] = None) extends Product with Serializable
- case class GenericModel(name: String, value: BsonDocument, product: GenericProduct, options: GenericOptions = GenericOptions.default) extends DatastoreModel with Product with Serializable
- case class GenericOptions(options: Option[Map[String, String]] = None) extends Product with Serializable
- sealed abstract class HttpCompression extends AnyRef
-
case class
HttpModel(name: String, url: String, method: String, headersFieldName: Option[String], valueFieldsNames: List[String], compression: HttpCompression, mediaType: String, logBody: Boolean, structured: Boolean = true) extends DatastoreModel with Product with Serializable
The HttpModel used by HttpWriter to send data with HTTP protocol
The HttpModel used by HttpWriter to send data with HTTP protocol
- name
The httpModel name
- url
The url to send the request to
- method
The HTTP methods: GET, POST, PUT, PATCH, DELETE
- headersFieldName
The name of the DataFrame column to be used as http headers, it must be of type Map[String,String], if None, no header will be sent in the request, except for the content-type and content-encoding ones
- valueFieldsNames
The list of DataFrame columns to be rendered as json in the http request body. If the passed list is empty, all the fields, except the headers field (if any) will be rendered as a json object. If there is only one field, the behaviour is controlled by the structured field
- compression
The HttpCompression
- mediaType
The format of the request content
- logBody
It enable the request body logger
- structured
Indicates how the request body will be rendered. The effect of this configuration has effect only if the DataFrame contains only one column to be sent and only if it is of ArrayType or MapType. If structured is true the array or map will always be enclosed in a json object, otherwise the map or the array will be at the top level of the json document. Input dataframe:
+---------+ | values | +---------+ |[3, 4, 5]| +---------+
Request with structured = true
{"values" : [3, 4, 5]}Request with structured = false
[3, 4, 5]
- case class IndexModel(name: String, creationTime: Long, schema: Option[String], query: Option[String] = None, numShards: Option[Int] = Some(1), replicationFactor: Option[Int] = Some(1), rollingIndex: Boolean = true, idField: Option[String] = None, options: Map[String, String] = Map.empty) extends DatastoreModel with Product with Serializable
-
class
IndexModelBuilder[Stage <: Stage, Kind <: DataStoreKind] extends AnyRef
A builder able to create instances of IndexModel.
A builder able to create instances of IndexModel.
- Stage
The current Stage of the builder.
- Kind
The kind of DataStore whose index is being built.
- final case class KeyValueDataStoreConf(inputKeyColumn: String, correlationIdColumn: String, keyValueModel: KeyValueModel, keyValueMatchingStrategy: KeyValueMatchingStrategy) extends DataStoreConf with Product with Serializable
- sealed trait KeyValueMatchingStrategy extends AnyRef
- case class KeyValueModel(name: String, tableCatalog: String, dataFrameSchema: Option[String], options: Option[Seq[KeyValueOption]], useAvroSchemaManager: Boolean, avroSchemas: Option[Map[String, String]]) extends DatastoreModel with Product with Serializable
- case class KeyValueOption(key: String, value: String) extends Product with Serializable
- case class LogEntry(log_source: String, log_level: String, message: String, timestamp: Instant, thread: String, cause: Option[String] = None, stacktrace: Option[String] = None) extends Product with Serializable
- case class Logs(found: Long, entries: Seq[LogEntry]) extends Product with Serializable
- trait Metadata extends AnyRef
- case class MetadataModel(id: String, sourceId: String, arrivalTimestamp: Long, lastSeenTimestamp: Long, path: Array[PathModel]) extends Product with Serializable
- case class MetricEntry(source: SourceEntry, name: String) extends Product with Serializable
- case class Metrics(found: Long, entries: Seq[MetricEntry]) extends Product with Serializable
- case class MlModelOnlyInfo(name: String, version: String, className: Option[String] = None, timestamp: Option[Long] = None, modelFileId: Option[BsonObjectId] = None, favorite: Boolean = false, description: String = "") extends Model with Product with Serializable
- trait Model extends AnyRef
-
case class
MultiTopicModel extends DatastoreModel with Product with Serializable
A model for grouping of topics.
A model for grouping of topics.
The
namefield specifies the name of the model, which is used as the unique identifier for the model in the models database.The
topicNameFieldfield specifies the field whose contents will be used as the name of the topic to which the message will be sent when writing to Kafka. The field must be of type string. The original field will be left as-is, so your schema must handle it (or you can usevalueFieldsNames).The
topicModelNamescontains the names of the topic model that constitute this grouping of topics.The topic models that constitute this grouping of topics must: - consist of at least one topic model - be all different models - refer to different topics - use the same settings for everything but partitions and replicas
- case class NoPartitionPruningStrategy() extends PartitionPruningStrategy with Product with Serializable
- sealed trait PartitionPruningStrategy extends AnyRef
- case class PathModel(name: String, ts: Long) extends Product with Serializable
- final case class PipegraphInstanceModel(name: String, instanceOf: String, startTimestamp: Long, currentStatusTimestamp: Long, status: PipegraphStatus, executedByNode: Option[String], peerActor: Option[String], error: Option[String] = None) extends Model with Product with Serializable
-
case class
PipegraphModel(name: String, description: String, owner: String, isSystem: Boolean, creationTime: Long, structuredStreamingComponents: List[StructuredStreamingETLModel], dashboard: Option[DashboardModel] = None, labels: Set[String] = Set.empty, enrichmentSources: RestEnrichmentConfigModel = ...) extends Model with Product with Serializable
A model for a pipegraph, a processing pipeline abstraction.
A model for a pipegraph, a processing pipeline abstraction.
- name
name of the pipegraph
- description
description of the pipegraph
- owner
owner of the pipegraph
- isSystem
whether the pipegraph is from the WASP system
- creationTime
time of creation of the pipegraph
- structuredStreamingComponents
components describing processing built on Spark Structured Streaming
- dashboard
dashboard of the pipegraph
- final case class PrefixAndTimeBoundKeyValueMatchingStrategy(separator: String, pattern: String, locale: String = "UTC") extends KeyValueMatchingStrategy with Product with Serializable
- final case class PrefixKeyValueMatchingStrategy() extends KeyValueMatchingStrategy with Product with Serializable
- case class PrefixRawMatchingStrategy(dataframeKeyMatchingExpression: String) extends RawMatchingStrategy with Product with Serializable
- case class ProcessGroupModel(name: String, content: BsonDocument, errorPort: String) extends Model with Product with Serializable
- trait ProcessingComponentModel extends AnyRef
-
case class
ProducerModel(name: String, className: String, topicName: Option[String], isActive: Boolean = false, configuration: Option[String] = None, isRemote: Boolean, isSystem: Boolean) extends Model with Product with Serializable
DataSource class.
DataSource class. The fields must be the same as the ones inside the MongoDB document associated with this model *
- case class RTModel(name: String, inputs: List[ReaderModel], isActive: Boolean = false, strategy: Option[StrategyModel] = None, endpoint: Option[WriterModel] = None) extends ProcessingComponentModel with Product with Serializable
- final case class RawDataStoreConf(inputKeyColumn: String, correlationIdColumn: String, rawModel: RawModel, rawMatchingStrategy: RawMatchingStrategy, partitionPruningStrategy: PartitionPruningStrategy, missingPathFailure: Boolean = false) extends DataStoreConf with Product with Serializable
- sealed trait RawMatchingStrategy extends AnyRef
-
case class
RawModel(name: String, uri: String, timed: Boolean = false, schema: String, options: RawOptions = RawOptions.default) extends DatastoreModel with Product with Serializable
A named model for data stored as files on a raw datastore (eg HDFS).
A named model for data stored as files on a raw datastore (eg HDFS).
The
uriis augmented with time information iftimedis true. For writers this means whether to useurias-is or create timed namespaces (eg for HDFS, a subdirectory) inside; for readers whether to read fromurias-is or from the most recent timed namespace inside.schemais a json-encoded DataFrame schema, that is, a StructType. See DataType.fromJson and DataType.json.optionscontrol the underlying spark DataFrameWriter/Reader in the writers/readers using an instance of this model.- name
the name of the datastore
- uri
the uri where the data files reside
- timed
whether the uri must be augmented with time information
- schema
the schema of the data
- options
the options for the datastore
-
case class
RawOptions(saveMode: String, format: String, extraOptions: Option[Map[String, String]] = None, partitionBy: Option[List[String]] = None) extends Product with Serializable
Options for a raw datastore.
Options for a raw datastore.
saveModespecifies the behaviour when saving and the output uri already exists; valid values are:- "error", throw an error and do not save anything
- "overwrite", overwrite existing data
- "append", append to existing data
- "ignore", do not save anything and don't throw any errors
- "default", like "error" for it.agilelab.bigdata.wasp.consumers.SparkWriter, like "append" for it.agilelab.bigdata.wasp.consumers.SparkStreamingWriter
formatspecifies the data format to use; valid values are:- "parquet" (this is the default)
- "orc"
- "json"
- any format accepted by the available Spark DataFrameWriters
extraOptionsallows specifying any writer-specific options accepted by DataFrameReader/Writer.optionpartitionByallows specifying columns to be used to partition the data by using different directories for different values- saveMode
specifies the behaviour when the output uri exists
- format
specifies the format to use
- extraOptions
extra options for the underlying writer
-
case class
ReaderModel extends Product with Serializable
A model for a reader, composed by a name, a datastoreModelName defining the datastore, a datastoreProduct defining the datastore software product to use, and any additional options needed to configure the reader.
- case class SourceEntry(name: String) extends Product with Serializable
- case class Sources(found: Long, entries: Seq[SourceEntry]) extends Product with Serializable
-
case class
SqlSourceModel(name: String, connectionName: String, dbtable: String, partitioningInfo: Option[JdbcPartitioningInfo], numPartitions: Option[Int], fetchSize: Option[Int]) extends DatastoreModel with Product with Serializable
Class representing a SqlSource model
Class representing a SqlSource model
- name
The name of the SqlSource model
- connectionName
The name of the connection to use. N.B. have to be present in jdbc-subConfig
- dbtable
The name of the table
- partitioningInfo
optional - Partition info (column, lowerBound, upperBound)
- numPartitions
optional - Number of partitions
- fetchSize
optional - Fetch size
- case class StrategyModel(className: String, configuration: Option[String] = None) extends Product with Serializable
- case class StreamingReaderModel extends Product with Serializable
-
case class
StructuredStreamingETLModel(name: String, group: String = "default", streamingInput: StreamingReaderModel, staticInputs: List[ReaderModel], streamingOutput: WriterModel, mlModels: List[MlModelOnlyInfo], strategy: Option[StrategyModel], triggerIntervalMs: Option[Long], options: Map[String, String] = Map.empty) extends ProcessingComponentModel with Product with Serializable
A streaming processing component that leverages Spark's Structured Streaming API.
A streaming processing component that leverages Spark's Structured Streaming API.
- name
unique name of the processing component
- group
group of which the processing component is part
- staticInputs
list of inputs for static datasets
- streamingOutput
streaming output
- mlModels
machine learning models to be used in the processing
- strategy
strategy model that defines the processing
- triggerIntervalMs
trigger interval to use, in milliseconds
- options
has no effect at all
- sealed trait SubjectStrategy extends AnyRef
- case class TelemetryEntry(source: SourceEntry, metric: MetricEntry, messageId: String, value: Long, timestamp: Instant) extends Product with Serializable
- case class TelemetryPoint(timestamp: Instant, value: Double) extends Product with Serializable
- case class TelemetrySeries(source: SourceEntry, metric: MetricEntry, series: Seq[TelemetryPoint]) extends Product with Serializable
- final case class TimeBasedBetweenPartitionPruningStrategy(columnName: String, isDateNumeric: Boolean, pattern: String, granularity: String) extends PartitionPruningStrategy with Product with Serializable
- sealed abstract class TopicCompression extends AnyRef
-
case class
TopicModel(name: String, creationTime: Long, partitions: Int, replicas: Int, topicDataType: String, keyFieldName: Option[String], headersFieldName: Option[String], valueFieldsNames: Option[Seq[String]], useAvroSchemaManager: Boolean, schema: BsonDocument, topicCompression: TopicCompression = TopicCompression.Disabled, subjectStrategy: SubjectStrategy = SubjectStrategy.None, keySchema: Option[String] = None, clusterAlias: Option[String] = None, schemaId: Option[Long] = None) extends DatastoreModel with Product with Serializable
A model for a topic, that is, a message queue of some sort.
A model for a topic, that is, a message queue of some sort. Right now this means just Kafka topics.
- name
the name of the topic, and doubles as the unique identifier for the model in the models database
- creationTime
marks the time at which the model was generated.
- partitions
the number of partitions used for the topic when wasp creates it
- replicas
the number of replicas used for the topic when wasp creates it
- topicDataType
field specifies the format to use when encoding/decoding data to/from messages, allowed values are: avro, plaintext, json, binary
- keyFieldName
optionally specify a field whose contents will be used as a message key when writing to Kafka. The field must be of type string or binary. The original field will be left as-is, so you schema must handle it (or you can use
valueFieldsNames).- headersFieldName
allows you to optionally specify a field whose contents will be used as message headers when writing to Kafka. The field must contain an array of non-null objects which must have a non-null field
headerKeyof type string and a fieldheaderValueof type binary. The original field will be left as-is, so your schema must handle it (or you can usevalueFieldsNames).- valueFieldsNames
allows you to specify a list of field names to be used to filter the fields that get passed to the value encoding; with this you can filter out fields that you don't need in the value, obviating the need to handle them in the schema. This is especially useful when specifying the
keyFieldNameorheadersFieldName. For the avro and json topic data type this is optional; for the plaintext and binary topic data types this field is mandatory and the list must contain a single value field name that has the proper type (string for plaintext and binary for binary).- useAvroSchemaManager
if a schema registry should be used or not to handle the schema evolution (it makes sense only for avro message datatype)
- schema
the Avro schema to use when encoding the value, for plaintext and binary this field is ignored. For json and avro the field names need to match 1:1 with the valueFieldsNames or the schema output of the strategy
- topicCompression
to use to compress messages
- subjectStrategy
subject strategy to use when registering the schema to the schema registry for the schema registry implementations that support it. This property makes sense only for avro and only if useAvroSchemaManager is set to true
- keySchema
the schema to be used to encode the key as avro
- case class WebMailModel(name: String) extends DatastoreModel with Product with Serializable
- case class WebsocketModel(name: String, host: String, port: String, resourceName: String, options: Option[BsonDocument] = None) extends DatastoreModel with Product with Serializable
-
case class
WriterModel extends Product with Serializable
A model for a writer, composed by a name, a datastoreModelName defining the datastore, a datastoreProduct defining the datastore software product to use, and any additional options needed to configure the writer.
-
case class
LegacyStreamingETLModel(name: String, inputs: List[ReaderModel], output: WriterModel, mlModels: List[MlModelOnlyInfo], strategy: Option[StrategyModel], kafkaAccessType: String, group: String = "default", isActive: Boolean = false) extends ProcessingComponentModel with Product with Serializable
- Annotations
- @deprecated
- Deprecated
(Since version 2.8.0)
Value Members
- object Aggregate extends Enumeration
- object BatchETLModel extends Serializable
- object BatchGdprETLModel extends Serializable
- object CdcOptions extends Serializable
- object ContainsRawMatchingStrategy extends Serializable
- object ExactKeyValueMatchingStrategy extends Serializable
- object ExactRawMatchingStrategy extends Serializable
- object GdprStrategyModel extends Serializable
-
object
GenericCdcMutationFields
Object used to represents all the fields used to represent a generic mutation inside the cdcPlugin, this object has been placed here because all the cdc adapters (like debezium, goldengate etc etc...) need to know how to map the fields into a compliant dataframe.
- object GenericOptions extends Serializable
- object HttpCompression
-
object
IndexModelBuilder
Companion object of IndexModelBuilder, contains the syntax.
Companion object of IndexModelBuilder, contains the syntax.
import IndexModelBuilder._ when you want to construct an IndexModel.
- object IndexType extends Enumeration
- object JobStatus extends Enumeration
- object KeyValueDataStoreConf extends Serializable
- object KeyValueModel extends Serializable
- object LegacyStreamingETLModel extends Serializable
- object MultiTopicModel extends Serializable
- object NoPartitionPruningStrategy extends Serializable
- object PipegraphStatus extends Enumeration
- object PrefixAndTimeBoundKeyValueMatchingStrategy extends Serializable
- object PrefixKeyValueMatchingStrategy extends Serializable
- object PrefixRawMatchingStrategy extends Serializable
- object RawDataStoreConf extends Serializable
- object RawModel extends Serializable
- object RawOptions extends Serializable
- object ReaderModel extends Serializable
- object SpraySolrProtocol extends DefaultJsonProtocol
- object StrategyModel extends Serializable
- object StreamingReaderModel extends Serializable
- object StructuredStreamingETLModel extends Serializable
- object SubjectStrategy
- object TimeBasedBetweenPartitionPruningStrategy extends Serializable
- object TopicCompression
- object TopicDataTypes
- object TopicModel extends Serializable
- object WriterModel extends Serializable