elastic · mjwolf · Aug 29, 2025 · Aug 13, 2025 · Aug 13, 2025 · Aug 13, 2025
diff --git a/cmd/create_data_stream.go b/cmd/create_data_stream.go
@@ -25,6 +25,7 @@ type newDataStreamAnswers struct {
 Name string
 Title string
 Type string
+Inputs []string
 Subobjects bool
 SyntheticAndTimeSeries bool
 Synthetic bool
@@ -126,6 +127,83 @@ func createDataStreamCommandAction(cmd *cobra.Command, args []string) error {
 }
 }
 
+if answers.Type == "logs" {
+qs := []*survey.Question{
+{
+Name: "inputs",
+Prompt: &survey.MultiSelect{
+Message: "Select input types which will be used in this data stream. See https://www.elastic.co/docs/reference/fleet/elastic-agent-inputs-list for description of the inputs",
+Options: []string{
+"aws-cloudwatch",
+"aws-s3",
+"azure-blob-storage",
+"azure-eventhub",
+"cel",
+"entity-analytics",
+"etw",
+"filestream",
+"gcp-pubsub",
+"gcs",
+"http_endpoint",
+"httpjson",
+"journald",
+"netflow",
+"redis",
+"tcp",
+"udp",
+"winlog",
+},
+PageSize: 50,
+Description: func(value string, index int) string {
+switch value {
+case "aws-cloudwatch":
+return "AWS Cloudwatch"
+case "aws-s3":
+return "AWS S3"
+case "azure-blob-storage":
+return "Azure Blob Storage"
+case "azure-eventhub":
+return "Azure Eventhub"
+case "cel":
+return "Common Expression Language (CEL)"
+case "entity-analytics":
+return "Entity Analytics"
+case "etw":
+return "Event Tracing for Windows (ETW)"
+case "filestream":
+return "Filestream"
+case "gcp-pubsub":
+return "GCP PubSub"
+case "gcs":
+return "Google Cloud Storage (GCS)"
+case "http_endpoint":
+return "HTTP Endpoint"
+case "httpjson":
+return "HTTP JSON"
+case "journald":
+return "Journald"
+case "netflow":
+return "Netflow"
+case "redis":
+return "Redis"
+case "tcp":
+return "TCP"
+case "udp":
+return "UDP"
+case "winlog":
+return "WinLogBeat"
+}
+return ""
+},
+},
+},
+}
+err = survey.Ask(qs, &answers)
+if err != nil {
+return fmt.Errorf("prompt failed: %w", err)
+}
+}
+
 descriptor := createDataStreamDescriptorFromAnswers(answers, packageRoot)
 err = archetype.CreateDataStream(descriptor)
 if err != nil {
@@ -163,6 +241,22 @@ func createDataStreamDescriptorFromAnswers(answers newDataStreamAnswers, package
 }
 }
 
+// If no inputs were selected, insert one so the datastream shows an example of an input.
+if answers.Type == "logs" && len(answers.Inputs) == 0 {
+answers.Inputs = []string{"tcp"}
+}
+
+if len(answers.Inputs) > 0 {
+var streams []packages.Stream
+for _, input := range answers.Inputs {
+streams = append(streams, packages.Stream{
+Input: input,
+Vars: []packages.Variable{},
+})
+}
+manifest.Streams = streams
+}
+
 return archetype.DataStreamDescriptor{
 Manifest: manifest,
 PackageRoot: packageRoot,

diff --git a/internal/docs/_static/inputs/aws-cloudwatch.yml b/internal/docs/_static/inputs/aws-cloudwatch.yml
@@ -0,0 +1,22 @@
+name: aws-cloudwatch
+documentation: |-
+ ## Setup
+
+ Set up a [CloudWatch Logs](https://docs.aws.amazon.com/bedrock/latest/userguide/model-invocation-logging.html#setup-cloudwatch-logs-destination) destination.
+
+ To access aws-cloudwatch, these [specific AWS permissions](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-aws-cloudwatch.html#_aws_permissions) are required.
+
+ ### Collecting logs from CloudWatch
+
+ When the log collection from CloudWatch is enabled, you can retrieve logs from
+ all log streams in a specific log group. `filterLogEvents` AWS API is used to
+ list log events from the specified log group. Amazon CloudWatch Logs can be used
+ to store log files from Amazon Elastic Compute Cloud(EC2), AWS CloudTrail,
+ Route53, and other sources.
+
+ To collect logs via CloudWatch, select **Collect logs via CloudWatch** and configure the following parameters:
+
+ - Access Key ID
+ - Secret Access Key
+ - Bucket ARN or Access Point ARN
+ - Session Token
diff --git a/internal/docs/_static/inputs/aws-s3.yml b/internal/docs/_static/inputs/aws-s3.yml
@@ -0,0 +1,35 @@
+name: aws-s3
+documentation: |-
+ ## Setup
+
+ Set up an Amazon S3
+ To create an Amazon S3 bucket, follow [these steps](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html).
+ - You can set the parameter "Bucket List Prefix" according to the requirement.
+
+ - **AWS S3 polling mode**: Writes data to S3, and Elastic Agent polls the S3 bucket by listing its contents and reading new files. 
+ - **AWS S3 SQS mode**: Writes data to S3; S3 sends a notification of a new object to SQS; the Elastic Agent receives the notification from SQS and then reads the S3 object. Multiple agents can be used in this mode.
+
+
+ ### Collecting logs from S3 bucket
+
+ When log collection from an S3 bucket is enabled, you can access logs from S3 objects referenced by S3 notification events received through an SQS queue or by directly polling the list of S3 objects within the bucket.
+
+ The use of SQS notification is preferred: polling list of S3 objects is expensive in terms of performance and costs and should be used only when no SQS notification can be attached to the S3 buckets. This input integration also supports S3 notification from SNS to SQS, or from EventBridge to SQS.
+
+ To enable the SQS notification method, set the `queue_url` configuration value. To enable the S3 bucket list polling method, configure both the `bucket_arn` and number_of_workers values. Note that `queue_url` and `bucket_arn` cannot be set simultaneously, and at least one of these values must be specified. The `number_of_workers` parameter is the primary way to control ingestion throughput for both S3 polling and SQS modes. This parameter determines how many parallel workers process S3 objects simultaneously.
+
+ NOTE: To access SQS and S3, these [specific AWS permissions](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-aws-s3.html#_aws_permissions_2) are required.
+
+ To collect logs via AWS S3, configure the following parameters:
+ - Collect logs via S3 Bucket toggled on
+ - Access Key ID
+ - Secret Access Key
+ - Bucket ARN or Access Point ARN
+ - Session Token
+
+ Alternatively, to collect logs via AWS SQS, configure the following parameters:
+ - Collect logs via S3 Bucket toggled off
+ - Queue URL
+ - Secret Access Key
+ - Access Key ID
+ - Session Token
diff --git a/internal/docs/_static/inputs/azure-blob-storage.yml b/internal/docs/_static/inputs/azure-blob-storage.yml
@@ -0,0 +1,16 @@
+name: azure-blob-storage
+documentation: |-
+ ## Setup
+ For more details about the Azure Blob Storage input settings, check the [Filebeat documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-azure-blob-storage.html).
+
+ ### Collecting logs from Azure Blob Storage
+
+ To collect logs via Azure Blob Storage, select **Collect logs via Azure Blob Storage** and configure the following parameters:
+
+ - Account Name: This attribute is required for various internal operations with respect to authentication, creating service clients and blob clients which are used internally for various processing purposes.
+ - Client ID (OAuth2): Client ID of Azure Account. This is required if **Collect logs using OAuth2 authentication** is enabled.
+ - Client Secret (OAuth2): Client Secret of Azure Account. This is required if **Collect logs using OAuth2 authentication** is enabled.
+ - Dataset name: Dataset to write data to. Changing the dataset will send the data to a different index. You can't use `-` in the name of a dataset and only valid characters for [Elasticsearch index names](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html).
+ - Containers: This attribute contains the details about a specific container like, `name`, `number_of_workers`, `poll`, `poll_interval`, and so on. The attribute `name` is specific to a container as it describes the container name, while the fields `number_of_workers`, `poll`, and `poll_interval` can exist at the container level and at the global level. If you defined the attributes globally, you can only specify the container name in this YAML config. If you want to override any specific attribute for a container, you can define it in the individual container's YAML configuration block within this Containers field. Any attribute defined in the YAML will override the global definitions. Check the [Azure documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-azure-blob-storage.html#attrib-containers) for further information.
+
+ NOTE: For OAuth2, toggle on **Collect logs using OAuth2 authentication**
diff --git a/internal/docs/_static/inputs/azure-eventhub.yml b/internal/docs/_static/inputs/azure-eventhub.yml
@@ -0,0 +1,17 @@
+name: azure-eventhub
+documentation: |-
+ ## Setup
+
+ - You must have a subscription to Microsoft Azure.
+ - Elastic Agent must be installed. For more details, check the Elastic Agent [installation instructions](docs-content://reference/fleet/install-elastic-agents.md). You can install only one Elastic Agent per host. 
+ Elastic Agent is required to stream data from the **Azure Event Hub** and ship the data to Elastic, where the events will then be processed via the integration's ingest pipelines.
+
+ ### Collecting logs from Azure Eventhub
+
+ To collect logs via Azure Event Hub, select **Collect logs via Azure Event Hub** and configure the following parameters:
+
+ - Eventhub: It is a fully managed, real-time data ingestion service. Elastic recommends using only letters, numbers, and the hyphen (-) character for Event Hub names to maximize compatibility. You can use existing Event Hubs having underscores (_) in the Event Hub name; in this case, the integration will replace underscores with hyphens (-) when it uses the Event Hub name to create dependent Azure resources behind the scenes (e.g., the storage account container to store Event Hub consumer offsets). Elastic also recommends using a separate event hub for each log type as the field mappings of each log type differ.
+ - Consumer group: The publish/subscribe mechanism of Event Hubs is enabled through consumer groups. A consumer group is a view (state, position, or offset) of an entire event hub. Consumer groups enable multiple consuming applications to each have a separate view of the event stream, and to read the stream independently at their own pace and with their own offsets.
+ - Connection string: The connection string required to communicate with Event Hubs. For more information, check [these steps](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-get-connection-string).
+ - Storage account: The name of the storage account the state/offsets will be stored and updated.
+ - Storage account key: The storage account key, this key will be used to authorize access to data in your storage account.
diff --git a/internal/docs/_static/inputs/cel.yml b/internal/docs/_static/inputs/cel.yml
@@ -0,0 +1,22 @@
+name: cel
+documentation: |-
+ ## Setup
+
+ For more details about the CEL input settings, check the [Filebeat documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-cel.html).
+
+ Before configuring the CEL input, make sure you have:
+ - Network connectivity to the target API endpoint
+ - Valid authentication credentials (API keys, tokens, or certificates as required)
+ - Appropriate permissions to read from the target data source
+
+ ### Collecting logs from CEL
+
+ To configure the CEL input, you must specify the `request.url` value pointing to the API endpoint. The interval parameter controls how frequently requests are made and is the primary way to balance data freshness with API rate limits and costs. Authentication is often configured through the `request.headers` section using the appropriate method for the service.
+
+ NOTE: To access the API service, make sure you have the necessary API credentials and that the Filebeat instance can reach the endpoint URL. Some services may require IP whitelisting or VPN access.
+
+ To collect logs via API endpoint, configure the following parameters:
+
+ - API Endpoint URL
+ - API credentials (tokens, keys, or username/password)
+ - Request interval (how often to fetch data)
diff --git a/internal/docs/_static/inputs/entity-analytics.yml b/internal/docs/_static/inputs/entity-analytics.yml
@@ -0,0 +1,13 @@
+name: entity-analytics
+documentation: |-
+ ## Setup
+
+ For more details about the Entity Analytics input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-entity-analytics).
+
+ ### Collecting logs from Entity Analytics
+
+ To collect logs via Entity Analytics, select **Collect identities** and configure the following parameters:
+
+ - Tenant id
+ - Client Application id
+ - Secret API key
diff --git a/internal/docs/_static/inputs/etw.yml b/internal/docs/_static/inputs/etw.yml
@@ -0,0 +1,11 @@
+name: etw
+documentation: |-
+ ## Setup
+
+ For more details about the ETW input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-etw).
+
+ ### Collecting logs from ETW
+
+ [Event Tracing for Windows (ETW)](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-etw.html), a mechanism that allows real-time logging and capturing of Windows system events. To collect logs from ETW, you can either initiate a new ETW session to gather logs directly from the DNS Server provider or read pre-existing logs from an `.etl` (Event Trace Log) file.
+
+ To collect logs via ETW, select **Collect analytical events from Windows ETW**.
diff --git a/internal/docs/_static/inputs/filestream.yml b/internal/docs/_static/inputs/filestream.yml
@@ -0,0 +1,12 @@
+name: filestream
+documentation: |-
+ ## Setup
+
+ For more details about the Filestream input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-filestream).
+
+
+ ### Collecting logs from Filestream
+
+ To collect logs via Filestream, select **Collect logs via Filestream** and configure the following parameters:
+
+ - Filestream paths: The full path to the related log file.
diff --git a/internal/docs/_static/inputs/gcp-pubsub.yml b/internal/docs/_static/inputs/gcp-pubsub.yml
@@ -0,0 +1,17 @@
+name: gcp-pubsub
+documentation: |-
+ ## Setup
+
+ For more details about the GCS Pub/Sub input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-gcs-pubsub).
+
+ 1. [Create Topic for Pub/sub](https://cloud.google.com/pubsub/docs/create-topic#create_a_topic).
+ 2. [Create Subscription for topic](https://cloud.google.com/pubsub/docs/create-subscription#create_subscriptions)
+
+ ### Collecting logs from GCP Pub/Sub
+
+ To collect logs via GCP Pub/Sub, select **Collect logs via GCP Pub/Sub** and configure the following parameters:
+
+ - Project ID
+ - Subscription Name
+ - Topic
+ - Credentials
diff --git a/internal/docs/_static/inputs/gcs.yml b/internal/docs/_static/inputs/gcs.yml
@@ -0,0 +1,11 @@
+name: gcs
+documentation: |-
+ ## Setup
+ For more details about the GCS input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-gcs).
+
+ ### Collecting logs from GCS
+
+ To collect logs via GCS, select **Collect logs via GCS** and configure the following parameters:
+
+ - Buckets: This attribute contains details related to specific buckets like `name`, `number_of_workers`, `poll`, `poll_interval`, and `bucket_timeout`. The attribute `name` is specific to a bucket as it describes the bucket name, while the fields `number_of_workers`, `poll`, `poll_interval` and `bucket_timeout` can exist at the bucket level and at the global level. If you defined the attributes globally, you can only specify the name in this YAML config. If you want to override any specific attribute for a specific bucket, you can define it here. Any attribute defined in the YAML will override the global definitions. Check the [Filebeat documentation](https://www.elastic.co/guide/en/beats/filebeat/8.5/filebeat-input-gcs.html#attrib-buckets) for further information.
+ - Project ID: This attribute is required for various internal operations with respect to authentication, creating storage clients and logging which are used internally for various processing purposes.
diff --git a/internal/docs/_static/inputs/http_endpoint.yml b/internal/docs/_static/inputs/http_endpoint.yml
@@ -0,0 +1,12 @@
+name: http_endpoint
+documentation: |-
+ ## Setup
+
+ For more details about the Http endpoint input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-http_endpoint).
+
+ ### Collecting logs from Http Endpoint
+
+ To collect logs via Http Endpoint, select **Collect logs via Http Endpoint** and configure the following parameters:
+
+ - Listen Address: Bind address for the HTTP listener. Use 0.0.0.0 to listen on all interfaces.
+ - Listen port: Bind port for the listener.
diff --git a/internal/docs/_static/inputs/httpjson.yml b/internal/docs/_static/inputs/httpjson.yml
@@ -0,0 +1,11 @@
+name: httpjson
+documentation: |-
+ ## Setup
+
+ For more details about the Http Json input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-httpjson).
+
+ ### Collecting logs from Http Json
+
+ To collect logs via http json, select **Collect logs via API** and configure the following parameter:
+
+ - API url: The API URL without the path.
diff --git a/internal/docs/_static/inputs/journald.yml b/internal/docs/_static/inputs/journald.yml
@@ -0,0 +1,11 @@
+name: journald
+documentation: |-
+ ## Setup
+ For more details about the Journald input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-journald).
+
+
+ ### Collecting logs from Journald
+
+ To collect logs via Journald, select **Collect logs via journald** and configure the following parameter:
+
+ - Condition: Condition to filter when to apply this input
diff --git a/internal/docs/_static/inputs/netflow.yml b/internal/docs/_static/inputs/netflow.yml
@@ -0,0 +1,12 @@
+name: netflow
+documentation: |-
+ ## Setup
+
+ Use the netflow input to read NetFlow and IPFIX exported flows and options records over UDP. For more details about the Netflow input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-netflow).
+
+ ### Collecting logs from Netflow
+
+ To collect logs via Netflow, select **Collect logs via Netflow** and configure the following parameters:
+
+ - Host
+ - Port
diff --git a/internal/docs/_static/inputs/redis.yml b/internal/docs/_static/inputs/redis.yml
@@ -0,0 +1,13 @@
+name: redis
+documentation: |-
+ ## Setup
+
+ For more details about the Redis input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-redis).
+
+ ### Collecting logs from Redis
+
+ To collect logs via Redis, select **Collect logs via Redis** and configure the following parameters:
+
+ - Host(s): The list of Redis hosts to connect to.
+ - Username
+ - Password