cortexlabs · RobertLucian · Jul 3, 2021 · Jun 28, 2021 · Jun 28, 2021 · Jun 28, 2021
diff --git a/cli/cmd/cluster.go b/cli/cmd/cluster.go
@@ -770,11 +770,12 @@ func cmdInfo(awsClient *aws.Client, accessConfig *clusterconfig.AccessConfig, st
 infoInterface = infoResponse.ClusterConfig.Config
 } else {
 infoInterface = map[string]interface{}{
-"cluster_config": infoResponse.ClusterConfig.Config,
-"cluster_metadata": infoResponse.ClusterConfig.OperatorMetadata,
-"node_infos": infoResponse.NodeInfos,
-"endpoint_operator": operatorEndpoint,
-"endpoint_api": apiEndpoint,
+"cluster_config": infoResponse.ClusterConfig.Config,
+"cluster_metadata": infoResponse.ClusterConfig.OperatorMetadata,
+"worker_node_infos": infoResponse.WorkerNodeInfos,
+"operator_node_infos": infoResponse.OperatorNodeInfos,
+"endpoint_operator": operatorEndpoint,
+"endpoint_api": apiEndpoint,
 }
 }
 
@@ -848,6 +849,8 @@ func printInfoPricing(infoResponse *schema.InfoResponse, clusterConfig clusterco
 eksPrice := aws.EKSPrices[clusterConfig.Region]
 operatorInstancePrice := aws.InstanceMetadatas[clusterConfig.Region]["t3.medium"].Price
 operatorEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp3"].PriceGB * 20 / 30 / 24
+prometheusInstancePrice := aws.InstanceMetadatas[clusterConfig.Region][clusterConfig.PrometheusInstanceType].Price
+prometheusEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp3"].PriceGB * 20 / 30 / 24
 metricsEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp2"].PriceGB * (40 + 2) / 30 / 24
 nlbPrice := aws.NLBMetadatas[clusterConfig.Region].Price
 natUnitPrice := aws.NATMetadatas[clusterConfig.Region].Price
@@ -891,17 +894,20 @@ func printInfoPricing(infoResponse *schema.InfoResponse, clusterConfig clusterco
 totalNodeGroupsPrice += totalEBSPrice + totalInstancePrice
 }
 
+operatorNodeGroupPrice := float64(len(infoResponse.OperatorNodeInfos)) * (operatorInstancePrice + operatorEBSPrice)
+prometheusNodeGroupPrice := prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice
+
 var natTotalPrice float64
 if clusterConfig.NATGateway == clusterconfig.SingleNATGateway {
 natTotalPrice = natUnitPrice
 } else if clusterConfig.NATGateway == clusterconfig.HighlyAvailableNATGateway {
 natTotalPrice = natUnitPrice * float64(len(clusterConfig.AvailabilityZones))
 }
-totalPrice := eksPrice + totalNodeGroupsPrice + 2*(operatorInstancePrice+operatorEBSPrice) + metricsEBSPrice + nlbPrice*2 + natTotalPrice
+totalPrice := eksPrice + totalNodeGroupsPrice + operatorNodeGroupPrice + prometheusNodeGroupPrice + nlbPrice*2 + natTotalPrice
 fmt.Printf(console.Bold("\nyour cluster currently costs %s per hour\n\n"), s.DollarsAndCents(totalPrice))
 
-operatorPrice := 2*(operatorInstancePrice+operatorEBSPrice) + metricsEBSPrice
-rows = append(rows, []interface{}{"2 t3.medium instances (cortex system)", s.DollarsAndTenthsOfCents(operatorPrice)})
+rows = append(rows, []interface{}{fmt.Sprintf("%d t3.medium %s (cortex system)", len(infoResponse.OperatorNodeInfos), s.PluralS("instance", len(infoResponse.OperatorNodeInfos))), s.DollarsAndTenthsOfCents(operatorNodeGroupPrice)})
+rows = append(rows, []interface{}{fmt.Sprintf("1 %s instance (cortex system)", clusterConfig.PrometheusInstanceType), s.DollarsAndTenthsOfCents(prometheusNodeGroupPrice)})
 rows = append(rows, []interface{}{"2 network load balancers", s.DollarsMaxPrecision(nlbPrice*2) + " total"})
 
 if clusterConfig.NATGateway == clusterconfig.SingleNATGateway {
@@ -919,11 +925,11 @@ func printInfoPricing(infoResponse *schema.InfoResponse, clusterConfig clusterco
 }
 
 func printInfoNodes(infoResponse *schema.InfoResponse) {
-numAPIInstances := len(infoResponse.NodeInfos)
+numAPIInstances := len(infoResponse.WorkerNodeInfos)
 
 var totalReplicas int
 var doesClusterHaveGPUs, doesClusterHaveInfs, doesClusterHaveAsyncGateways, doesClusterHaveEnqueuers bool
-for _, nodeInfo := range infoResponse.NodeInfos {
+for _, nodeInfo := range infoResponse.WorkerNodeInfos {
 totalReplicas += nodeInfo.NumReplicas
 if nodeInfo.ComputeUserCapacity.GPU > 0 {
 doesClusterHaveGPUs = true
@@ -946,7 +952,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
 
 fmt.Printf(console.Bold("\nyour cluster has %d API %s running across %d %s%s\n"), totalReplicas, s.PluralS("replica", totalReplicas), numAPIInstances, s.PluralS("instance", numAPIInstances), pendingReplicasStr)
 
-if len(infoResponse.NodeInfos) == 0 {
+if len(infoResponse.WorkerNodeInfos) == 0 {
 return
 }
 
@@ -963,7 +969,7 @@ func printInfoNodes(infoResponse *schema.InfoResponse) {
 }
 
 var rows [][]interface{}
-for _, nodeInfo := range infoResponse.NodeInfos {
+for _, nodeInfo := range infoResponse.WorkerNodeInfos {
 lifecycle := "on-demand"
 if nodeInfo.IsSpot {
 lifecycle = "spot"

diff --git a/cli/cmd/lib_cluster_config.go b/cli/cmd/lib_cluster_config.go
@@ -163,7 +163,9 @@ func getConfigureClusterConfig(awsClient *aws.Client, stacks clusterstate.Cluste
 func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient *aws.Client, disallowPrompt bool) {
 eksPrice := aws.EKSPrices[clusterConfig.Region]
 operatorInstancePrice := aws.InstanceMetadatas[clusterConfig.Region]["t3.medium"].Price
+prometheusInstancePrice := aws.InstanceMetadatas[clusterConfig.Region][clusterConfig.PrometheusInstanceType].Price
 operatorEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp3"].PriceGB * 20 / 30 / 24
+prometheusEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp3"].PriceGB * 20 / 30 / 24
 metricsEBSPrice := aws.EBSMetadatas[clusterConfig.Region]["gp2"].PriceGB * (40 + 2) / 30 / 24
 nlbPrice := aws.NLBMetadatas[clusterConfig.Region].Price
 natUnitPrice := aws.NATMetadatas[clusterConfig.Region].Price
@@ -184,9 +186,10 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient
 rows = append(rows, []interface{}{"1 eks cluster", s.DollarsMaxPrecision(eksPrice)})
 
 ngNameToSpotInstancesUsed := map[string]int{}
-fixedPrice := eksPrice + 2*(operatorInstancePrice+operatorEBSPrice) + metricsEBSPrice + 2*nlbPrice + natTotalPrice
-totalMinPrice := fixedPrice
-totalMaxPrice := fixedPrice
+baseMinPrice := eksPrice + operatorInstancePrice + operatorEBSPrice + prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice + 2*nlbPrice + natTotalPrice
+baseMaxPrice := eksPrice + 25*(operatorInstancePrice+operatorEBSPrice) + prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice + 2*nlbPrice + natTotalPrice
+totalMinPrice := baseMinPrice
+totalMaxPrice := baseMaxPrice
 for _, ng := range clusterConfig.NodeGroups {
 apiInstancePrice := aws.InstanceMetadatas[clusterConfig.Region][ng.InstanceType].Price
 apiEBSPrice := aws.EBSMetadatas[clusterConfig.Region][ng.InstanceVolumeType.String()].PriceGB * float64(ng.InstanceVolumeSize) / 30 / 24
@@ -223,8 +226,11 @@ func confirmInstallClusterConfig(clusterConfig *clusterconfig.Config, awsClient
 rows = append(rows, []interface{}{workerInstanceStr, workerPriceStr})
 }
 
-operatorPrice := 2*(operatorInstancePrice+operatorEBSPrice) + metricsEBSPrice
-rows = append(rows, []interface{}{"2 t3.medium instances (cortex system)", s.DollarsAndTenthsOfCents(operatorPrice)})
+minOperatorNodeGroupPrice := operatorInstancePrice + operatorEBSPrice
+maxOperatorNodeGroupPrice := 25 * (operatorInstancePrice + operatorEBSPrice)
+prometheusNodeGroupPrice := prometheusInstancePrice + prometheusEBSPrice + metricsEBSPrice
+rows = append(rows, []interface{}{"1-25 t3.medium instances (cortex system)", fmt.Sprintf("%s - %s (depending on load)", s.DollarsAndTenthsOfCents(minOperatorNodeGroupPrice), s.DollarsAndTenthsOfCents(maxOperatorNodeGroupPrice))})
+rows = append(rows, []interface{}{fmt.Sprintf("1 %s instance (cortex system)", clusterConfig.PrometheusInstanceType), s.DollarsAndTenthsOfCents(prometheusNodeGroupPrice)})
 rows = append(rows, []interface{}{"2 network load balancers", s.DollarsMaxPrecision(nlbPrice) + " each"})
 
 if clusterConfig.NATGateway == clusterconfig.SingleNATGateway {

diff --git a/dev/prometheus.md b/dev/prometheus.md
@@ -0,0 +1,211 @@
+# Metrics
+
+## Updating metrics
+
+When new metrics/labels/exporters are added to be scraped by prometheus, make sure the following list **is updated** as well to keep track of what metrics/labels are needed or not.
+
+The following is a list of metrics that are currently in use.
+
+#### Cortex metrics
+
+1. cortex_in_flight_requests with the following labels:
+ 1. api_name
+1. cortex_async_request_count with the following labels:
+ 1. api_name
+ 1. api_kind
+ 1. status_code
+1. cortex_async_queue_length with the following labels:
+ 1. api_name
+ 1. api_kind
+1. cortex_async_latency_bucket with the following labels:
+ 1. api_name
+ 1. api_kind
+1. cortex_batch_succeeded with the following labels:
+ 1. api_name
+1. cortex_batch_failed with the following labels:
+ 1. api_name
+1. cortex_time_per_batch_sum with the following labels:
+ 1. api_name
+1. cortex_time_per_batch_count with the following labels:
+ 1. api_name
+
+#### Istio metrics
+
+1. istio_requests_total with the following labels:
+ 1. destination_service
+ 1. response_code
+1. istio_request_duration_milliseconds_bucket with the following labels:
+ 1. destination_service
+ 1. le
+1. istio_request_duration_milliseconds_sum with the following labels:
+ 1. destination_service
+1. istio_request_duration_milliseconds_count with the following labels:
+ 1. destination_service
+
+#### Kubelet metrics
+1. container_cpu_usage_seconds_total with the following labels:
+ 1. pod
+ 1. container
+ 1. name
+1. container_memory_working_set_bytes with the following labels:
+ 1. pod
+ 1. name
+ 1. container
+
+#### Kube-state-metrics metrics
+
+1. kube_pod_container_resource_requests with the following labels:
+ 1. exported_pod
+ 1. resource
+ 1. exported_container (required for not dropping the values for each container of each pod)
+1. kube_pod_info with the following labels:
+ 1. exported_pod
+1. kube_deployment_status_replicas_available with the following labels:
+ 1. deployment
+1. kube_job_status_active with the following labels:
+ 1. job_name
+
+#### DCGM metrics
+
+1. DCGM_FI_DEV_GPU_UTIL with the following labels:
+ 1. exported_pod
+1. DCGM_FI_DEV_FB_USED with the following labels:
+ 1. exported_pod
+1. DCGM_FI_DEV_FB_FREE with the following labels:
+ 1. exported_pod
+
+#### Node metrics
+
+1. node_cpu_seconds_total with the following labels:
+ 1. job
+ 1. mode
+ 1. instance
+ 1. cpu
+1. node_load1 with the following labels:
+ 1. job
+ 1. instance
+1. node_load5 with the following labels:
+ 1. job
+ 1. instance
+1. node_load15 with the following labels:
+ 1. job
+ 1. instance
+1. node_exporter_build_info with the following labels:
+ 1. job
+ 1. instance
+1. node_memory_MemTotal_bytes with the following labels:
+ 1. job
+ 1. instance
+1. node_memory_MemFree_bytes with the following labels:
+ 1. job
+ 1. instance
+1. node_memory_Buffers_bytes with the following labels:
+ 1. job
+ 1. instance
+1. node_memory_Cached_bytes with the following labels:
+ 1. job
+ 1. instance
+1. node_memory_MemAvailable_bytes with the following labels:
+ 1. job
+ 1. instance
+1. node_disk_read_bytes_total with the following labels:
+ 1. job
+ 1. instance
+ 1. device
+1. node_disk_written_bytes_total with the following labels:
+ 1. job
+ 1. instance
+ 1. device
+1. node_disk_io_time_seconds_total with the following labels:
+ 1. job
+ 1. instance
+ 1. device
+1. node_filesystem_size_bytes with the following labels:
+ 1. job
+ 1. instance
+ 1. fstype
+ 1. mountpoint
+ 1. device
+1. node_filesystem_avail_bytes with the following labels:
+ 1. job
+ 1. instance
+ 1. fstype
+ 1. device
+1. node_network_receive_bytes_total with the following labels:
+ 1. job
+ 1. instance
+ 1. device
+1. node_network_transmit_bytes_total with the following labels:
+ 1. job
+ 1. instance
+ 1. device
+
+##### Prometheus rules for the node exporter
+
+1. instance:node_cpu_utilisation:rate1m from the following metrics:
+ 1. node_cpu_seconds_total with the following labels:
+ 1. job
+ 1. mode
+1. instance:node_num_cpu:sum from the following metrics:
+ 1. node_cpu_seconds_total with the following labels:
+ 1. job
+1. instance:node_load1_per_cpu:ratio from the following metrics:
+ 1. node_load1 with the following labels:
+ 1. job
+1. instance:node_memory_utilisation:ratio from the following metrics:
+ 1. node_memory_MemTotal_bytes with the following labels:
+ 1. job
+ 1. node_memory_MemAvailable_bytes with the following labels:
+ 1. job
+1. instance:node_vmstat_pgmajfault:rate1m with the following metrics:
+ 1. node_vmstat_pgmajfault with the following labels:
+ 1. job
+1. instance_device:node_disk_io_time_seconds:rate1m with the following metrics:
+ 1. node_disk_io_time_seconds_total with the following labels:
+ 1. job
+ 1. device
+1. instance_device:node_disk_io_time_weighted_seconds:rate1m with the following metrics:
+ 1. node_disk_io_time_weighted_seconds with the following labels:
+ 1. job
+ 1. device
+1. instance:node_network_receive_bytes_excluding_lo:rate1m with the following metrics:
+ 1. node_network_receive_bytes_total with the following labels:
+ 1. job
+ 1. device
+1. instance:node_network_transmit_bytes_excluding_lo:rate1m with the following metrics:
+ 1. node_network_transmit_bytes_total with the following labels:
+ 1. job
+ 1. device
+1. instance:node_network_receive_drop_excluding_lo:rate1m with the following metrics:
+ 1. node_network_receive_drop_total with the following labels:
+ 1. job
+ 1. device
+1. instance:node_network_transmit_drop_excluding_lo:rate1m with the following metrics:
+ 1. node_network_transmit_drop_total with the following labels:
+ 1. job
+ 1. device
+
+## Re-introducing dropped metrics/labels
+
+If you need to add some metrics/labels back for some particular use case, comment out every `metricRelabelings:` section (except the one from the `prometheus-operator.yaml` file), determine which metrics/labels you want to add back (i.e. by using the explorer from Grafana) and then re-edit the appropriate `metricRelabelings:` sections to account for the un-dropped metrics/labels.
+
+## Prometheus Analysis
+
+### Go Pprof
+
+To analyse the memory allocations of prometheus, run `kubectl port-forward prometheus-prometheus-0 9090:9090`, and then run `go tool pprof -symbolize=remote -inuse_space localhost:9090/debug/pprof/heap`. Once you get the interpreter, you can run `top` or `dot` for a more detailed hierarchy of the memory usage.
+
+### TSDB
+
+To analyse the TSDB of prometheus, exec into the `prometheus-prometheus-0` pod, `cd` into `/tmp`, and run the following code-block:
+
+```bash
+wget https://github.com/prometheus/prometheus/releases/download/v1.7.3/prometheus-1.7.3.linux-amd64.tar.gz
+tar -xzf prometheus-*
+cd prometheus-*
+./tsdb analyze /prometheus | less
+```
+
+*Useful link: https://www.robustperception.io/using-tsdb-analyze-to-investigate-churn-and-cardinality*
+
+Or you can go to `localhost:9090` -> `Status` -> `TSDB Status`, but it's not as complete as running a binary analysis.
diff --git a/docs/clusters/management/create.md b/docs/clusters/management/create.md
@@ -86,11 +86,15 @@ tags: # <string>: <string> map of key/value pairs
 # SSL certificate ARN (only necessary when using a custom domain)
 ssl_certificate_arn:
 
-# List of IAM policies to attach to your Cortex APIs
+# list of IAM policies to attach to your Cortex APIs
 iam_policy_arns: ["arn:aws:iam::aws:policy/AmazonS3FullAccess"]
 
 # primary CIDR block for the cluster's VPC
 vpc_cidr: 192.168.0.0/16
+
+# instance type for prometheus
+# use a bigger instance if the cluster is expected to grow past the thousand node (or api replica) mark
+prometheus_instance_type: "t3.medium"
 ```
 
 The docker images used by the cluster can also be overridden. They can be configured by adding any of these keys to your cluster configuration file (default values are shown):