Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions packages/aws_lambda_otel/changelog.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# newer versions go on top
- version: "0.8.0"
changes:
- description: Add ML anomaly detection modules for Lambda function performance (duration, concurrency) and errors/throttles.
type: enhancement
link: https://github.com/elastic/integrations/pull/19923
- version: "0.7.0"
changes:
- description: Improve ESQL queries in dashboards
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
{
"id": "aws_lambda_otel-activity-ml",
"type": "ml-module",
"migrationVersion": {
"search": "7.9.3"
},
"references": [],
"attributes": {
"id": "aws_lambda_otel-activity-ml",
"title": "AWS Lambda function errors and throttles (OpenTelemetry)",
"description": "Detect anomalous rates of Lambda errors, throttles, and dead-letter failures (the per-bucket Sum statistic) streamed from CloudWatch via the OpenTelemetry firehose receiver. Each function is modelled against its own history, so an elevation in error or throttle rate that is abnormal for that function is caught even when it stays below the fixed error-rate alert thresholds.",
"type": "AWS metrics",
"logo": {
"icon": "logoAWSMono"
},
"defaultIndexPattern": "metrics-aws.lambda.otel-*",
"query": {
"bool": {
"filter": [
{
"term": {
"Namespace": "AWS/Lambda"
}
},
{
"exists": {
"field": "FunctionName"
}
}
],
"must_not": {
"terms": {
"_tier": [
"data_frozen",
"data_cold"
]
}
}
}
},
"jobs": [
{
"id": "aws_lambda_function_error_anomaly",
"config": {
"groups": [
"aws",
"lambda",
"otel"
],
"description": "AWS Lambda: detect functions producing unusual rates of errors, throttles, or dead-letter failures relative to that function's own history. The threshold-based alert rules cover hard error-rate and throttle-rate breaches; this job catches per-function rate elevations that drift below any fixed threshold, with the function, region, and account surfaced as influencers for attribution.",
"analysis_config": {
"bucket_span": "15m",
"summary_count_field_name": "doc_count",
"detectors": [
{
"detector_description": "Anomalous error rate",
"function": "high_mean",
"field_name": "metrics.amazonaws.com/AWS/Lambda/Errors",
"by_field_name": "FunctionName",
"partition_field_name": "cloud.region"
},
{
"detector_description": "Anomalous throttle rate",
"function": "high_mean",
"field_name": "metrics.amazonaws.com/AWS/Lambda/Throttles",
"by_field_name": "FunctionName",
"partition_field_name": "cloud.region"
},
{
"detector_description": "Anomalous dead-letter delivery failures",
"function": "high_mean",
"field_name": "metrics.amazonaws.com/AWS/Lambda/DeadLetterErrors",
"by_field_name": "FunctionName",
"partition_field_name": "cloud.region"
}
],
"influencers": [
"FunctionName",
"cloud.region",
"cloud.account.id"
]
},
"analysis_limits": {
"model_memory_limit": "64mb"
},
"data_description": {
"time_field": "@timestamp",
"time_format": "epoch_ms"
},
"model_plot_config": {
"enabled": false,
"annotations_enabled": true
},
"custom_settings": {
"created_by": "ml-module-aws-lambda-otel-activity-ml"
}
}
}
],
"datafeeds": [
{
"id": "datafeed-aws_lambda_function_error_anomaly",
"job_id": "aws_lambda_function_error_anomaly",
"config": {
"job_id": "aws_lambda_function_error_anomaly",
"indices": [
"INDEX_PATTERN_NAME"
],
"indices_options": {
"allow_no_indices": true
},
"query": {
"bool": {
"filter": [
{
"term": {
"stat": "Sum"
}
},
{
"terms": {
"MetricName": [
"Errors",
"Throttles",
"DeadLetterErrors"
]
}
},
{
"exists": {
"field": "FunctionName"
}
}
]
}
},
"aggregations": {
"buckets": {
"composite": {
"size": 1000,
"sources": [
{
"date": {
"date_histogram": {
"field": "@timestamp",
"fixed_interval": "900s"
}
}
},
{
"cloud.region": {
"terms": {
"field": "cloud.region"
}
}
},
{
"FunctionName": {
"terms": {
"field": "FunctionName"
}
}
}
]
},
"aggregations": {
"@timestamp": {
"max": {
"field": "@timestamp"
}
},
"metrics.amazonaws.com/AWS/Lambda/Errors": {
"avg": {
"field": "metrics.amazonaws.com/AWS/Lambda/Errors"
}
},
"metrics.amazonaws.com/AWS/Lambda/Throttles": {
"avg": {
"field": "metrics.amazonaws.com/AWS/Lambda/Throttles"
}
},
"metrics.amazonaws.com/AWS/Lambda/DeadLetterErrors": {
"avg": {
"field": "metrics.amazonaws.com/AWS/Lambda/DeadLetterErrors"
}
}
}
}
}
}
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
{
"id": "aws_lambda_otel-metrics-ml",
"type": "ml-module",
"migrationVersion": {
"search": "7.9.3"
},
"references": [],
"attributes": {
"id": "aws_lambda_otel-metrics-ml",
"title": "AWS Lambda function performance (OpenTelemetry)",
"description": "Detect anomalies in Lambda function duration and concurrency streamed from CloudWatch via the OpenTelemetry firehose receiver. Each function is modelled against its own history, so latency drift and concurrency climbing toward the account limit are caught before they cross the static alert thresholds or begin to throttle.",
"type": "AWS metrics",
"logo": {
"icon": "logoAWSMono"
},
"defaultIndexPattern": "metrics-aws.lambda.otel-*",
"query": {
"bool": {
"filter": [
{
"term": {
"Namespace": "AWS/Lambda"
}
},
{
"exists": {
"field": "FunctionName"
}
}
],
"must_not": {
"terms": {
"_tier": [
"data_frozen",
"data_cold"
]
}
}
}
},
"jobs": [
{
"id": "aws_lambda_function_performance_anomaly",
"config": {
"groups": [
"aws",
"lambda",
"otel"
],
"description": "AWS Lambda: detect functions whose average duration or concurrency has drifted unusually relative to that function's own history - latency creeping up (a slow dependency, cold-start regression, or larger payloads) or concurrency climbing toward the account limit before throttling begins. The static alert rules cover hard duration/concurrency breaches; this job covers the sub-threshold drift, with the function, region, and account as influencers.",
"analysis_config": {
"bucket_span": "15m",
"summary_count_field_name": "doc_count",
"detectors": [
{
"detector_description": "Anomalous average duration (latency drift)",
"function": "high_mean",
"field_name": "metrics.amazonaws.com/AWS/Lambda/Duration",
"by_field_name": "FunctionName",
"partition_field_name": "cloud.region"
},
{
"detector_description": "Anomalous concurrent executions (saturation trajectory)",
"function": "high_mean",
"field_name": "metrics.amazonaws.com/AWS/Lambda/ConcurrentExecutions",
"by_field_name": "FunctionName",
"partition_field_name": "cloud.region"
}
],
"influencers": [
"FunctionName",
"cloud.region",
"cloud.account.id"
]
},
"analysis_limits": {
"model_memory_limit": "128mb"
},
"data_description": {
"time_field": "@timestamp",
"time_format": "epoch_ms"
},
"model_plot_config": {
"enabled": false,
"annotations_enabled": true
},
"custom_settings": {
"created_by": "ml-module-aws-lambda-otel-metrics-ml"
}
}
}
],
"datafeeds": [
{
"id": "datafeed-aws_lambda_function_performance_anomaly",
"job_id": "aws_lambda_function_performance_anomaly",
"config": {
"job_id": "aws_lambda_function_performance_anomaly",
"indices": [
"INDEX_PATTERN_NAME"
],
"indices_options": {
"allow_no_indices": true
},
"query": {
"bool": {
"filter": [
{
"term": {
"stat": "Average"
}
},
{
"terms": {
"MetricName": [
"Duration",
"ConcurrentExecutions"
]
}
},
{
"exists": {
"field": "FunctionName"
}
}
]
}
},
"aggregations": {
"buckets": {
"composite": {
"size": 1000,
"sources": [
{
"date": {
"date_histogram": {
"field": "@timestamp",
"fixed_interval": "900s"
}
}
},
{
"cloud.region": {
"terms": {
"field": "cloud.region"
}
}
},
{
"FunctionName": {
"terms": {
"field": "FunctionName"
}
}
}
]
},
"aggregations": {
"@timestamp": {
"max": {
"field": "@timestamp"
}
},
"metrics.amazonaws.com/AWS/Lambda/Duration": {
"avg": {
"field": "metrics.amazonaws.com/AWS/Lambda/Duration"
}
},
"metrics.amazonaws.com/AWS/Lambda/ConcurrentExecutions": {
"avg": {
"field": "metrics.amazonaws.com/AWS/Lambda/ConcurrentExecutions"
}
}
}
}
}
}
}
]
}
}
2 changes: 1 addition & 1 deletion packages/aws_lambda_otel/manifest.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
format_version: 3.6.0
name: aws_lambda_otel
title: "AWS Lambda Metrics OpenTelemetry Assets"
version: 0.7.0
version: 0.8.0
source:
license: "Elastic-2.0"
description: "AWS Lambda Metrics OpenTelemetry Assets"
Expand Down
Loading