- Notifications
You must be signed in to change notification settings - Fork 129
Generate data stream input configuration and documentation #2826
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 15 commits
229b8a7 236567e c2675c7 1c8d105 cfe4b6a 85f2e4c 498cee7 fa005fd e895dd0 69c6854 f72eeeb 181d6e7 4deb7c4 4d0e01e eb5a251 d320f9e 5c747e8 ad06043 86af1ea fc75add 05acf87 ab84d6f c66ca35 50f0b72 d9eaf8e 7e69dd5 5c9bea9 3b6483b File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| | @@ -25,6 +25,7 @@ type newDataStreamAnswers struct { | |
| Name string | ||
| Title string | ||
| Type string | ||
| Inputs []string | ||
| Subobjects bool | ||
| SyntheticAndTimeSeries bool | ||
| Synthetic bool | ||
| | @@ -126,6 +127,83 @@ func createDataStreamCommandAction(cmd *cobra.Command, args []string) error { | |
| } | ||
| } | ||
| | ||
| if answers.Type == "logs" { | ||
| qs := []*survey.Question{ | ||
| { | ||
| Name: "inputs", | ||
| Prompt: &survey.MultiSelect{ | ||
| Message: "Select input types which will be used in this data stream. See https://www.elastic.co/docs/reference/fleet/elastic-agent-inputs-list for description of the inputs", | ||
| Options: []string{ | ||
| "aws-cloudwatch", | ||
| ||
| "aws-s3", | ||
| "azure-blob-storage", | ||
| "azure-eventhub", | ||
| "cel", | ||
| "entity-analytics", | ||
| "etw", | ||
| "filestream", | ||
| "gcp-pubsub", | ||
| "gcs", | ||
| "http_endpoint", | ||
| "httpjson", | ||
| ||
| "journald", | ||
| "netflow", | ||
| "redis", | ||
| "tcp", | ||
| "udp", | ||
| "winlog", | ||
| ||
| }, | ||
| PageSize: 50, | ||
| Description: func(value string, index int) string { | ||
| switch value { | ||
| case "aws-cloudwatch": | ||
| return "AWS Cloudwatch" | ||
| case "aws-s3": | ||
| return "AWS S3" | ||
| case "azure-blob-storage": | ||
| return "Azure Blob Storage" | ||
| case "azure-eventhub": | ||
| return "Azure Eventhub" | ||
| case "cel": | ||
| return "Common Expression Language (CEL)" | ||
| case "entity-analytics": | ||
| return "Entity Analytics" | ||
| case "etw": | ||
| return "Event Tracing for Windows (ETW)" | ||
| case "filestream": | ||
| return "Filestream" | ||
| case "gcp-pubsub": | ||
| return "GCP PubSub" | ||
| case "gcs": | ||
| return "Google Cloud Storage (GCS)" | ||
| case "http_endpoint": | ||
| return "HTTP Endpoint" | ||
| case "httpjson": | ||
| return "HTTP JSON" | ||
| case "journald": | ||
| return "Journald" | ||
| case "netflow": | ||
| return "Netflow" | ||
| case "redis": | ||
| return "Redis" | ||
| case "tcp": | ||
| return "TCP" | ||
| case "udp": | ||
| return "UDP" | ||
| case "winlog": | ||
| return "WinLogBeat" | ||
| } | ||
| return "" | ||
| }, | ||
| }, | ||
| }, | ||
| } | ||
| err = survey.Ask(qs, &answers) | ||
| if err != nil { | ||
| return fmt.Errorf("prompt failed: %w", err) | ||
| } | ||
| } | ||
| | ||
| descriptor := createDataStreamDescriptorFromAnswers(answers, packageRoot) | ||
| err = archetype.CreateDataStream(descriptor) | ||
| if err != nil { | ||
| | @@ -163,6 +241,22 @@ func createDataStreamDescriptorFromAnswers(answers newDataStreamAnswers, package | |
| } | ||
| } | ||
| | ||
| // If no inputs were selected, insert one so the datastream shows an example of an input. | ||
| if answers.Type == "logs" && len(answers.Inputs) == 0 { | ||
| answers.Inputs = []string{"tcp"} | ||
| } | ||
| | ||
| if len(answers.Inputs) > 0 { | ||
| var streams []packages.Stream | ||
| for _, input := range answers.Inputs { | ||
| streams = append(streams, packages.Stream{ | ||
| Input: input, | ||
| Vars: []packages.Variable{}, | ||
| }) | ||
| } | ||
| manifest.Streams = streams | ||
| } | ||
| | ||
| return archetype.DataStreamDescriptor{ | ||
| Manifest: manifest, | ||
| PackageRoot: packageRoot, | ||
| | ||
| Member There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a plan to keep these docs updated in packages? |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| name: aws-cloudwatch | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| Set up a [CloudWatch Logs](https://docs.aws.amazon.com/bedrock/latest/userguide/model-invocation-logging.html#setup-cloudwatch-logs-destination) destination. | ||
| | ||
| To access aws-cloudwatch, these [specific AWS permissions](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-aws-cloudwatch.html#_aws_permissions) are required. | ||
| | ||
| ### Collecting logs from CloudWatch | ||
| | ||
| When the log collection from CloudWatch is enabled, you can retrieve logs from | ||
| all log streams in a specific log group. `filterLogEvents` AWS API is used to | ||
| list log events from the specified log group. Amazon CloudWatch Logs can be used | ||
| to store log files from Amazon Elastic Compute Cloud(EC2), AWS CloudTrail, | ||
| Route53, and other sources. | ||
| | ||
| To collect logs via CloudWatch, select **Collect logs via CloudWatch** and configure the following parameters: | ||
| | ||
| - Access Key ID | ||
| - Secret Access Key | ||
| - Bucket ARN or Access Point ARN | ||
| - Session Token |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| name: aws-s3 | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| Set up an Amazon S3 | ||
| To create an Amazon S3 bucket, follow [these steps](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html). | ||
| - You can set the parameter "Bucket List Prefix" according to the requirement. | ||
| | ||
| - **AWS S3 polling mode**: Writes data to S3, and Elastic Agent polls the S3 bucket by listing its contents and reading new files. | ||
| - **AWS S3 SQS mode**: Writes data to S3; S3 sends a notification of a new object to SQS; the Elastic Agent receives the notification from SQS and then reads the S3 object. Multiple agents can be used in this mode. | ||
| | ||
| | ||
| ### Collecting logs from S3 bucket | ||
| | ||
| When log collection from an S3 bucket is enabled, you can access logs from S3 objects referenced by S3 notification events received through an SQS queue or by directly polling the list of S3 objects within the bucket. | ||
| | ||
| The use of SQS notification is preferred: polling list of S3 objects is expensive in terms of performance and costs and should be used only when no SQS notification can be attached to the S3 buckets. This input integration also supports S3 notification from SNS to SQS, or from EventBridge to SQS. | ||
| | ||
| To enable the SQS notification method, set the `queue_url` configuration value. To enable the S3 bucket list polling method, configure both the `bucket_arn` and number_of_workers values. Note that `queue_url` and `bucket_arn` cannot be set simultaneously, and at least one of these values must be specified. The `number_of_workers` parameter is the primary way to control ingestion throughput for both S3 polling and SQS modes. This parameter determines how many parallel workers process S3 objects simultaneously. | ||
| | ||
| NOTE: To access SQS and S3, these [specific AWS permissions](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-aws-s3.html#_aws_permissions_2) are required. | ||
| | ||
| To collect logs via AWS S3, configure the following parameters: | ||
| - Collect logs via S3 Bucket toggled on | ||
| - Access Key ID | ||
| - Secret Access Key | ||
| - Bucket ARN or Access Point ARN | ||
| - Session Token | ||
| | ||
| Alternatively, to collect logs via AWS SQS, configure the following parameters: | ||
| - Collect logs via S3 Bucket toggled off | ||
| - Queue URL | ||
| - Secret Access Key | ||
| - Access Key ID | ||
| - Session Token |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| name: azure-blob-storage | ||
| documentation: |- | ||
| ## Setup | ||
| For more details about the Azure Blob Storage input settings, check the [Filebeat documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-azure-blob-storage.html). | ||
| | ||
| ### Collecting logs from Azure Blob Storage | ||
| | ||
| To collect logs via Azure Blob Storage, select **Collect logs via Azure Blob Storage** and configure the following parameters: | ||
| | ||
| - Account Name: This attribute is required for various internal operations with respect to authentication, creating service clients and blob clients which are used internally for various processing purposes. | ||
| - Client ID (OAuth2): Client ID of Azure Account. This is required if **Collect logs using OAuth2 authentication** is enabled. | ||
| - Client Secret (OAuth2): Client Secret of Azure Account. This is required if **Collect logs using OAuth2 authentication** is enabled. | ||
| - Dataset name: Dataset to write data to. Changing the dataset will send the data to a different index. You can't use `-` in the name of a dataset and only valid characters for [Elasticsearch index names](https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-index_.html). | ||
| - Containers: This attribute contains the details about a specific container like, `name`, `number_of_workers`, `poll`, `poll_interval`, and so on. The attribute `name` is specific to a container as it describes the container name, while the fields `number_of_workers`, `poll`, and `poll_interval` can exist at the container level and at the global level. If you defined the attributes globally, you can only specify the container name in this YAML config. If you want to override any specific attribute for a container, you can define it in the individual container's YAML configuration block within this Containers field. Any attribute defined in the YAML will override the global definitions. Check the [Azure documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-azure-blob-storage.html#attrib-containers) for further information. | ||
| | ||
| NOTE: For OAuth2, toggle on **Collect logs using OAuth2 authentication** |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| name: azure-eventhub | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| - You must have a subscription to Microsoft Azure. | ||
| - Elastic Agent must be installed. For more details, check the Elastic Agent [installation instructions](docs-content://reference/fleet/install-elastic-agents.md). You can install only one Elastic Agent per host. | ||
| Elastic Agent is required to stream data from the **Azure Event Hub** and ship the data to Elastic, where the events will then be processed via the integration's ingest pipelines. | ||
| | ||
| ### Collecting logs from Azure Eventhub | ||
| | ||
| To collect logs via Azure Event Hub, select **Collect logs via Azure Event Hub** and configure the following parameters: | ||
| | ||
| - Eventhub: It is a fully managed, real-time data ingestion service. Elastic recommends using only letters, numbers, and the hyphen (-) character for Event Hub names to maximize compatibility. You can use existing Event Hubs having underscores (_) in the Event Hub name; in this case, the integration will replace underscores with hyphens (-) when it uses the Event Hub name to create dependent Azure resources behind the scenes (e.g., the storage account container to store Event Hub consumer offsets). Elastic also recommends using a separate event hub for each log type as the field mappings of each log type differ. | ||
| - Consumer group: The publish/subscribe mechanism of Event Hubs is enabled through consumer groups. A consumer group is a view (state, position, or offset) of an entire event hub. Consumer groups enable multiple consuming applications to each have a separate view of the event stream, and to read the stream independently at their own pace and with their own offsets. | ||
| - Connection string: The connection string required to communicate with Event Hubs. For more information, check [these steps](https://docs.microsoft.com/en-us/azure/event-hubs/event-hubs-get-connection-string). | ||
| - Storage account: The name of the storage account the state/offsets will be stored and updated. | ||
| - Storage account key: The storage account key, this key will be used to authorize access to data in your storage account. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| name: cel | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the CEL input settings, check the [Filebeat documentation](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-cel.html). | ||
| | ||
| Before configuring the CEL input, make sure you have: | ||
| - Network connectivity to the target API endpoint | ||
| - Valid authentication credentials (API keys, tokens, or certificates as required) | ||
| - Appropriate permissions to read from the target data source | ||
| | ||
| ### Collecting logs from CEL | ||
| | ||
| To configure the CEL input, you must specify the `request.url` value pointing to the API endpoint. The interval parameter controls how frequently requests are made and is the primary way to balance data freshness with API rate limits and costs. Authentication is often configured through the `request.headers` section using the appropriate method for the service. | ||
| | ||
| NOTE: To access the API service, make sure you have the necessary API credentials and that the Filebeat instance can reach the endpoint URL. Some services may require IP whitelisting or VPN access. | ||
| | ||
| To collect logs via API endpoint, configure the following parameters: | ||
| | ||
| - API Endpoint URL | ||
| - API credentials (tokens, keys, or username/password) | ||
| - Request interval (how often to fetch data) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| name: entity-analytics | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the Entity Analytics input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-entity-analytics). | ||
| | ||
| ### Collecting logs from Entity Analytics | ||
| | ||
| To collect logs via Entity Analytics, select **Collect identities** and configure the following parameters: | ||
| | ||
| - Tenant id | ||
| - Client Application id | ||
| - Secret API key |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| name: etw | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the ETW input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-etw). | ||
| | ||
| ### Collecting logs from ETW | ||
| | ||
| [Event Tracing for Windows (ETW)](https://www.elastic.co/guide/en/beats/filebeat/current/filebeat-input-etw.html), a mechanism that allows real-time logging and capturing of Windows system events. To collect logs from ETW, you can either initiate a new ETW session to gather logs directly from the DNS Server provider or read pre-existing logs from an `.etl` (Event Trace Log) file. | ||
| | ||
| To collect logs via ETW, select **Collect analytical events from Windows ETW**. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| name: filestream | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the Filestream input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-filestream). | ||
| | ||
| | ||
| ### Collecting logs from Filestream | ||
| | ||
| To collect logs via Filestream, select **Collect logs via Filestream** and configure the following parameters: | ||
| | ||
| - Filestream paths: The full path to the related log file. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| name: gcp-pubsub | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the GCS Pub/Sub input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-gcs-pubsub). | ||
| | ||
| 1. [Create Topic for Pub/sub](https://cloud.google.com/pubsub/docs/create-topic#create_a_topic). | ||
| 2. [Create Subscription for topic](https://cloud.google.com/pubsub/docs/create-subscription#create_subscriptions) | ||
| | ||
| ### Collecting logs from GCP Pub/Sub | ||
| | ||
| To collect logs via GCP Pub/Sub, select **Collect logs via GCP Pub/Sub** and configure the following parameters: | ||
| | ||
| - Project ID | ||
| - Subscription Name | ||
| - Topic | ||
| - Credentials |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| name: gcs | ||
| documentation: |- | ||
| ## Setup | ||
| For more details about the GCS input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-gcs). | ||
| | ||
| ### Collecting logs from GCS | ||
| | ||
| To collect logs via GCS, select **Collect logs via GCS** and configure the following parameters: | ||
| | ||
| - Buckets: This attribute contains details related to specific buckets like `name`, `number_of_workers`, `poll`, `poll_interval`, and `bucket_timeout`. The attribute `name` is specific to a bucket as it describes the bucket name, while the fields `number_of_workers`, `poll`, `poll_interval` and `bucket_timeout` can exist at the bucket level and at the global level. If you defined the attributes globally, you can only specify the name in this YAML config. If you want to override any specific attribute for a specific bucket, you can define it here. Any attribute defined in the YAML will override the global definitions. Check the [Filebeat documentation](https://www.elastic.co/guide/en/beats/filebeat/8.5/filebeat-input-gcs.html#attrib-buckets) for further information. | ||
| - Project ID: This attribute is required for various internal operations with respect to authentication, creating storage clients and logging which are used internally for various processing purposes. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| name: http_endpoint | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the Http endpoint input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-http_endpoint). | ||
| | ||
| ### Collecting logs from Http Endpoint | ||
| | ||
| To collect logs via Http Endpoint, select **Collect logs via Http Endpoint** and configure the following parameters: | ||
| | ||
| - Listen Address: Bind address for the HTTP listener. Use 0.0.0.0 to listen on all interfaces. | ||
| - Listen port: Bind port for the listener. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| name: httpjson | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the Http Json input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-httpjson). | ||
| | ||
| ### Collecting logs from Http Json | ||
| | ||
| To collect logs via http json, select **Collect logs via API** and configure the following parameter: | ||
| | ||
| - API url: The API URL without the path. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| name: journald | ||
| documentation: |- | ||
| ## Setup | ||
| For more details about the Journald input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-journald). | ||
| | ||
| | ||
| ### Collecting logs from Journald | ||
| | ||
| To collect logs via Journald, select **Collect logs via journald** and configure the following parameter: | ||
| | ||
| - Condition: Condition to filter when to apply this input |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,12 @@ | ||
| name: netflow | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| Use the netflow input to read NetFlow and IPFIX exported flows and options records over UDP. For more details about the Netflow input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-netflow). | ||
| | ||
| ### Collecting logs from Netflow | ||
| | ||
| To collect logs via Netflow, select **Collect logs via Netflow** and configure the following parameters: | ||
| | ||
| - Host | ||
| - Port |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| name: redis | ||
| documentation: |- | ||
| ## Setup | ||
| | ||
| For more details about the Redis input settings, check the [Filebeat documentation](https://www.elastic.co/docs/reference/beats/filebeat/filebeat-input-redis). | ||
| | ||
| ### Collecting logs from Redis | ||
| | ||
| To collect logs via Redis, select **Collect logs via Redis** and configure the following parameters: | ||
| | ||
| - Host(s): The list of Redis hosts to connect to. | ||
| - Username | ||
| - Password |

Uh oh!
There was an error while loading. Please reload this page.