@EventDriven @SideEffectFree @SupportsBatching @InputRequirement(value=INPUT_REQUIRED) @Tags(value={"evaluate","extract","Text","Regular Expression","regex"}) @CapabilityDescription(value="Evaluates one or more Regular Expressions against the content of a FlowFile. The results of those Regular Expressions are assigned to FlowFile Attributes. Regular Expressions are entered by adding user-defined properties; the name of the property maps to the Attribute Name into which the result will be placed. The attributes are generated differently based on the enabling of named capture groups. If named capture groups are not enabled: The first capture group, if any found, will be placed into that attribute name.But all capture groups, including the matching string sequence itself will also be provided at that attribute name with an index value provided, with the exception of a capturing group that is optional and does not match - for example, given the attribute name \"regex\" and expression \"abc(def)?(g)\" we would add an attribute \"regex.1\" with a value of \"def\" if the \"def\" matched. If the \"def\" did not match, no attribute named \"regex.1\" would be added but an attribute named \"regex.2\" with a value of \"g\" will be added regardless.If named capture groups are enabled: Each named capture group, if found will be placed into the attributes name with the name provided. If enabled the matching string sequence itself will be placed into the attribute name. If multiple matches are enabled, and index will be applied after the first set of matches. The exception is a capturing group that is optional and does not match For example, given the attribute name \"regex\" and expression \"abc(?<NAMED>def)?(?<NAMED-TWO>g)\" we would add an attribute \"regex.NAMED\" with the value of \"def\" if the \"def\" matched. We would add an attribute \"regex.NAMED-TWO\" with the value of \"g\" if the \"g\" matched regardless. The value of the property must be a valid Regular Expressions with one or more capturing groups. If named capture groups are enabled, all capture groups must be named. If they are not, then the processor configuration will fail validation. If the Regular Expression matches more than once, only the first match will be used unless the property enabling repeating capture group is set to true. If any provided Regular Expression matches, the FlowFile(s) will be routed to \'matched\'. If no provided Regular Expression matches, the FlowFile will be routed to \'unmatched\' and no attributes will be applied to the FlowFile.") @DynamicProperty(name="A FlowFile attribute", value="A Regular Expression with one or more capturing group", description="The first capture group, if any found, will be placed into that attribute name.But all capture groups, including the matching string sequence itself will also be provided at that attribute name with an index value provided.") public class ExtractText extends AbstractProcessor
| Modifier and Type | Field and Description |
|---|---|
private BlockingQueue<byte[]> |
bufferQueue |
static PropertyDescriptor |
CANON_EQ |
static PropertyDescriptor |
CASE_INSENSITIVE |
static PropertyDescriptor |
CHARACTER_SET |
static PropertyDescriptor |
COMMENTS |
private AtomicReference<Map<String,Pattern>> |
compiledPattersMapRef |
static PropertyDescriptor |
DOTALL |
static PropertyDescriptor |
ENABLE_NAMED_GROUPS |
static PropertyDescriptor |
ENABLE_REPEATING_CAPTURE_GROUP |
static PropertyDescriptor |
INCLUDE_CAPTURE_GROUP_ZERO |
static PropertyDescriptor |
LITERAL |
static PropertyDescriptor |
MAX_BUFFER_SIZE |
static PropertyDescriptor |
MAX_CAPTURE_GROUP_LENGTH |
static PropertyDescriptor |
MULTILINE |
private List<PropertyDescriptor> |
properties |
static Relationship |
REL_MATCH |
static Relationship |
REL_NO_MATCH |
private Set<Relationship> |
relationships |
static PropertyDescriptor |
UNICODE_CASE |
static PropertyDescriptor |
UNICODE_CHARACTER_CLASS |
static PropertyDescriptor |
UNIX_LINES |
| Constructor and Description |
|---|
ExtractText() |
| Modifier and Type | Method and Description |
|---|---|
protected Collection<ValidationResult> |
customValidate(ValidationContext validationContext) |
(package private) int |
getCompileFlags(ProcessContext context) |
Set<Relationship> |
getRelationships() |
protected PropertyDescriptor |
getSupportedDynamicPropertyDescriptor(String propertyDescriptorName) |
protected List<PropertyDescriptor> |
getSupportedPropertyDescriptors() |
protected void |
init(ProcessorInitializationContext context) |
void |
onScheduled(ProcessContext context) |
void |
onStopped() |
void |
onTrigger(ProcessContext context,
ProcessSession session) |
onTriggergetControllerServiceLookup, getIdentifier, getLogger, getNodeTypeProvider, initialize, isConfigurationRestored, isScheduled, toString, updateConfiguredRestoredTrue, updateScheduledFalse, updateScheduledTrueequals, getPropertyDescriptor, getPropertyDescriptors, hashCode, onPropertyModified, validateclone, finalize, getClass, notify, notifyAll, wait, wait, waitisStatefulgetPropertyDescriptor, getPropertyDescriptors, onPropertyModified, validatepublic static final PropertyDescriptor CHARACTER_SET
public static final PropertyDescriptor MAX_BUFFER_SIZE
public static final PropertyDescriptor MAX_CAPTURE_GROUP_LENGTH
public static final PropertyDescriptor CANON_EQ
public static final PropertyDescriptor CASE_INSENSITIVE
public static final PropertyDescriptor COMMENTS
public static final PropertyDescriptor DOTALL
public static final PropertyDescriptor LITERAL
public static final PropertyDescriptor MULTILINE
public static final PropertyDescriptor UNICODE_CASE
public static final PropertyDescriptor UNICODE_CHARACTER_CLASS
public static final PropertyDescriptor UNIX_LINES
public static final PropertyDescriptor INCLUDE_CAPTURE_GROUP_ZERO
public static final PropertyDescriptor ENABLE_REPEATING_CAPTURE_GROUP
public static final PropertyDescriptor ENABLE_NAMED_GROUPS
public static final Relationship REL_MATCH
public static final Relationship REL_NO_MATCH
private Set<Relationship> relationships
private List<PropertyDescriptor> properties
private final BlockingQueue<byte[]> bufferQueue
private final AtomicReference<Map<String,Pattern>> compiledPattersMapRef
protected void init(ProcessorInitializationContext context)
init in class AbstractSessionFactoryProcessorpublic Set<Relationship> getRelationships()
getRelationships in interface ProcessorgetRelationships in class AbstractSessionFactoryProcessorprotected List<PropertyDescriptor> getSupportedPropertyDescriptors()
getSupportedPropertyDescriptors in class AbstractConfigurableComponentprotected PropertyDescriptor getSupportedDynamicPropertyDescriptor(String propertyDescriptorName)
getSupportedDynamicPropertyDescriptor in class AbstractConfigurableComponentprotected Collection<ValidationResult> customValidate(ValidationContext validationContext)
customValidate in class AbstractConfigurableComponent@OnScheduled public final void onScheduled(ProcessContext context) throws IOException
IOException@OnStopped public void onStopped()
public void onTrigger(ProcessContext context, ProcessSession session)
onTrigger in class AbstractProcessorint getCompileFlags(ProcessContext context)
Copyright © 2023 Apache NiFi Project. All rights reserved.