@KFStep(name="Sorter", category="Tools", toolTipText="Sort instances in ascending or descending order according to the values of user-specified attributes. Instances can be sorted according to multiple attributes (defined in order). Handles datasets larger than can be fit into main memory via instance connections and specifying the in-memory buffer size. Implements a merge-sort by writing the sorted in-memory buffer to a file when full and then interleaving instances from the disk-based file(s) when the incoming stream has finished.", iconPath="weka/gui/knowledgeflow/icons/Sorter.gif") public class Sorter extends BaseStep
| Modifier and Type | Class and Description |
|---|---|
protected static class |
Sorter.InstanceHolder
Inner class that holds instances and the index of the temp file that holds
them (if operating in incremental mode)
|
protected static class |
Sorter.SortComparator
Comparator that applies the sort rules to
InstanceHolders |
static class |
Sorter.SortRule
Implements a sorting rule based on a single attribute
|
| Modifier and Type | Field and Description |
|---|---|
protected java.util.List<java.io.File> |
m_bufferFiles
List of sorted temp files for incremental operation
|
protected java.lang.String |
m_bufferSize
Size of the in-memory buffer
|
protected int |
m_bufferSizeI
Size of the in-memory buffer after resolving any environment vars
|
protected Instances |
m_connectedFormat
format of instances for current incoming connection (if any)
|
protected java.util.List<Sorter.InstanceHolder> |
m_incrementalBuffer
In memory buffer for incremental operation
|
protected boolean |
m_isReset
True if we've been reset
|
protected Sorter.SortComparator |
m_sortComparator
Comparator that applies the sort rules
|
protected java.lang.String |
m_sortDetails
Holds the internal textual description of the sort definitions
|
protected boolean |
m_streaming
True if processing streaming data
|
protected Data |
m_streamingData
To (re)use when streaming
|
protected java.util.Map<java.lang.String,java.lang.Integer> |
m_stringAttIndexes
Holds indexes of string attributes, keyed by attribute name
|
protected java.io.File |
m_tempDirectory
The directory to hold the temp files - if not set the system tmp directory
is used
|
m_stepIsResourceIntensive, m_stepManager, m_stepName| Constructor and Description |
|---|
Sorter() |
| Modifier and Type | Method and Description |
|---|---|
protected void |
emitBufferedInstances()
Output any buffered instances
|
java.lang.String |
getBufferSize()
Get the size of the in-memory buffer
|
java.lang.String |
getCustomEditorForStep()
Return the fully qualified name of a custom editor component (JComponent)
to use for editing the properties of the step.
|
java.util.List<java.lang.String> |
getIncomingConnectionTypes()
Get a list of incoming connection types that this step can accept.
|
java.util.List<java.lang.String> |
getOutgoingConnectionTypes()
Get a list of outgoing connection types that this step can produce.
|
java.lang.String |
getSortDetails()
Get the sort rules to use
|
java.io.File |
getTempDirectory()
Get the directory to use for temporary files during incremental operation
|
protected void |
init(Instances structure)
Initialize given the supplied instances structure
|
protected void |
processBatch(Data data)
Process batch data
|
void |
processIncoming(Data data)
Process an incoming data payload (if the step accepts incoming connections)
|
protected void |
processIncremental(Data data)
Process incremental data
|
void |
setBufferSize(java.lang.String buffSize)
Set the size of the in-memory buffer
|
void |
setSortDetails(java.lang.String sortDetails)
Set the sort rules to use
|
void |
setTempDirectory(java.io.File tempDir)
Set the directory to use for temporary files during incremental operation
|
void |
stepInit()
Initialize the step.
|
environmentSubstitute, getDefaultSettings, getInteractiveViewers, getInteractiveViewersImpls, getName, getStepManager, globalInfo, isResourceIntensive, isStopRequested, outputStructureForConnectionType, setName, setStepIsResourceIntensive, setStepManager, setStepMustRunSingleThreaded, start, stepMustRunSingleThreaded, stopprotected transient Sorter.SortComparator m_sortComparator
protected transient java.util.List<Sorter.InstanceHolder> m_incrementalBuffer
protected transient java.util.List<java.io.File> m_bufferFiles
protected java.lang.String m_bufferSize
protected int m_bufferSizeI
protected java.util.Map<java.lang.String,java.lang.Integer> m_stringAttIndexes
protected java.lang.String m_sortDetails
protected java.io.File m_tempDirectory
protected Instances m_connectedFormat
protected boolean m_isReset
protected boolean m_streaming
protected Data m_streamingData
public java.lang.String getBufferSize()
@OptionMetadata(displayName="Size of in-mem streaming buffer", description="Number of instances to sort in memory before writing to a temp file (instance connections only)", displayOrder=1) public void setBufferSize(java.lang.String buffSize)
buffSize - the size of the in-memory buffer@FilePropertyMetadata(fileChooserDialogType=0, directoriesOnly=true) @OptionMetadata(displayName="Directory for temp files", description="Where to store temporary files when spilling to disk", displayOrder=2) public void setTempDirectory(java.io.File tempDir)
tempDir - the temp dir to usepublic java.io.File getTempDirectory()
@ProgrammaticProperty public void setSortDetails(java.lang.String sortDetails)
sortDetails - the sort rules in internal string representationpublic java.lang.String getSortDetails()
public void stepInit()
throws WekaException
WekaException - if a problem occurs during initializationpublic java.util.List<java.lang.String> getIncomingConnectionTypes()
public java.util.List<java.lang.String> getOutgoingConnectionTypes()
protected void init(Instances structure)
structure - the structure to initialize withpublic void processIncoming(Data data) throws WekaException
processIncoming in interface BaseStepExtenderprocessIncoming in interface StepprocessIncoming in class BaseStepdata - the data to processWekaException - if a problem occursprotected void processBatch(Data data) throws WekaException
data - the data to processWekaException - if a problem occursprotected void processIncremental(Data data) throws WekaException
data - the data to processWekaException - if a problem occursprotected void emitBufferedInstances()
throws WekaException
WekaException - if a problem occurspublic java.lang.String getCustomEditorForStep()
getCustomEditorForStep in interface StepgetCustomEditorForStep in class BaseStep