Skip to content

Commit a4fbb2b

Browse files
committed
Added system logs push to Cloudwatch Log Group
1 parent 11cc923 commit a4fbb2b

File tree

3 files changed

+197
-158
lines changed

3 files changed

+197
-158
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# GraphDB AWS Terraform Module Changelog
22

3+
## 3.1.0
4+
5+
* Added support for sending system logs to CloudWatch log groups
6+
37
## 3.0.2
48

59
* Updated GraphDB default version to [11.2.1](https://graphdb.ontotext.com/documentation/11.2/release-notes.html#graphdb-11-2-1)

modules/graphdb/templates/07_cloudwatch_setup.sh.tpl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ echo "#################################"
1515
echo "# Cloudwatch Provisioning #"
1616
echo "#################################"
1717

18+
usermod -aG adm cwagent
19+
1820
# Appends configuration overrides to graphdb.properties
1921
if [ ${deploy_monitoring} == "true" ]; then
2022
GRAPHDB_ADMIN_PASSWORD=$(aws --cli-connect-timeout 300 ssm get-parameter --region ${region} --name "/${name}/graphdb/admin_password" --with-decryption --query "Parameter.Value" --output text | base64 -d)
Lines changed: 191 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -1,167 +1,200 @@
11
{
2-
"agent": {
3-
"metrics_collection_interval": 60,
4-
"run_as_user": "cwagent"
5-
},
6-
"logs": {
7-
"logs_collected": {
8-
"files": {
9-
"collect_list": [
10-
{
11-
"file_path": "/var/opt/graphdb/node/logs/main.log",
12-
"log_group_name": "${name}",
13-
"log_stream_name": "{local_hostname}",
14-
"filters": [
15-
{
16-
"type": "exclude",
17-
"expression": "INFO"
18-
}
19-
]
20-
}
21-
]
22-
}
23-
},
24-
"metrics_collected": {
25-
"prometheus": {
26-
"log_group_name": "${name}",
27-
"prometheus_config_path": "/etc/prometheus/prometheus.yaml",
28-
"emf_processor": {
29-
"metric_declaration_dedup": true,
30-
"metric_namespace": "${name}",
31-
"metric_unit": {
32-
"graphdb_nonheap_used_mem": "Bytes",
33-
"graphdb_work_dir_used": "Bytes",
34-
"graphdb_threads_count": "Count",
35-
"graphdb_logs_dir_free": "Bytes",
36-
"graphdb_heap_committed_mem": "Bytes",
37-
"graphdb_logs_dir_used": "Bytes",
38-
"graphdb_heap_max_mem": "Bytes",
39-
"graphdb_heap_init_mem": "Bytes",
40-
"graphdb_nonheap_init_mem": "Bytes",
41-
"graphdb_class_count": "Count",
42-
"graphdb_mem_garbage_collections_count": "Count",
43-
"graphdb_data_dir_free": "Bytes",
44-
"graphdb_nonheap_max_mem": "Bytes",
45-
"graphdb_work_dir_free": "Bytes",
46-
"graphdb_cpu_load": "Percent",
47-
"graphdb_data_dir_used": "Bytes",
48-
"graphdb_nonheap_committed_mem": "Bytes",
49-
"graphdb_heap_used_mem": "Bytes",
50-
"graphdb_open_file_descriptors": "Count",
51-
"graphdb_nodes_in_cluster": "Count",
52-
"graphdb_nodes_in_sync": "Count",
53-
"graphdb_nodes_out_of_sync": "Count",
54-
"graphdb_nodes_disconnected": "Count",
55-
"graphdb_nodes_syncing": "Count",
56-
"graphdb_leader_elections_count": "Count",
57-
"graphdb_failure_recoveries_count": "Count"
58-
},
59-
"metric_declaration": [
60-
{
61-
"source_labels": [
62-
"job"
63-
],
64-
"label_matcher": "graphdb_infrastructure_monitor",
65-
"dimensions": [
66-
[
67-
"host"
68-
]
69-
],
70-
"metric_selectors": [
71-
"^graphdb_nonheap_used_mem$",
72-
"^graphdb_work_dir_used$",
73-
"^graphdb_threads_count$",
74-
"^graphdb_logs_dir_free$",
75-
"^graphdb_heap_committed_mem$",
76-
"^graphdb_logs_dir_used$",
77-
"^graphdb_heap_max_mem$",
78-
"^graphdb_heap_init_mem$",
79-
"^graphdb_nonheap_init_mem$",
80-
"^graphdb_class_count$",
81-
"^graphdb_mem_garbage_collections_count$",
82-
"^graphdb_data_dir_free$",
83-
"^graphdb_nonheap_max_mem$",
84-
"^graphdb_work_dir_free$",
85-
"^graphdb_cpu_load$",
86-
"^graphdb_data_dir_used$",
87-
"^graphdb_nonheap_committed_mem$",
88-
"^graphdb_heap_used_mem$",
89-
"^graphdb_open_file_descriptors$"
90-
]
91-
},
92-
{
93-
"source_labels": [
94-
"job"
95-
],
96-
"label_matcher": "graphdb_cluster_monitor",
97-
"dimensions": [
98-
[
99-
"host"
100-
]
101-
],
102-
"metric_selectors": [
103-
"^graphdb_nodes_in_cluster$",
104-
"^graphdb_nodes_in_sync$",
105-
"^graphdb_nodes_out_of_sync$",
106-
"^graphdb_nodes_disconnected$",
107-
"^graphdb_nodes_syncing$",
108-
"^graphdb_leader_elections_count$",
109-
"^graphdb_failure_recoveries_count$"
110-
]
111-
}
112-
]
113-
}
114-
}
115-
}
116-
},
117-
"metrics": {
118-
"aggregation_dimensions": [
119-
[
120-
"AutoScalingGroupName"
2+
"agent": {
3+
"metrics_collection_interval": 60,
4+
"run_as_user": "cwagent"
5+
},
6+
"logs": {
7+
"logs_collected": {
8+
"files": {
9+
"collect_list": [
10+
{
11+
"file_path": "/var/opt/graphdb/node/logs/main.log",
12+
"log_group_name": "${name}",
13+
"log_stream_name": "{local_hostname}",
14+
"filters": [
15+
{
16+
"type": "exclude",
17+
"expression": "INFO"
18+
}
12119
]
122-
],
123-
"append_dimensions": {
124-
"InstanceId": "$${aws:InstanceId}",
125-
"AutoScalingGroupName": "$${aws:AutoScalingGroupName}"
126-
},
127-
"metrics_collected": {
128-
"cpu": {
129-
"measurement": [
130-
"cpu_usage_user",
131-
"cpu_usage_system"
132-
],
133-
"metrics_collection_interval": 10,
134-
"totalcpu": false
135-
},
136-
"disk": {
137-
"measurement": [
138-
"used_percent",
139-
"disk_free",
140-
"disk_used_percent"
141-
],
142-
"metrics_collection_interval": 10,
143-
"resources": [
144-
"*"
20+
},
21+
{
22+
"file_path": "/var/log/dmesg",
23+
"log_group_name": "${name}",
24+
"log_stream_name": "{local_hostname}/dmesg",
25+
"filters": [
26+
{
27+
"type": "exclude",
28+
"expression": "INFO"
29+
}
30+
]
31+
},
32+
{
33+
"file_path": "/var/log/kern.log",
34+
"log_group_name": "${name}",
35+
"log_stream_name": "{local_hostname}/kern.log",
36+
"filters": [
37+
{
38+
"type": "exclude",
39+
"expression": "INFO"
40+
}
41+
]
42+
},
43+
{
44+
"file_path": "/var/log/syslog",
45+
"log_group_name": "${name}",
46+
"log_stream_name": "{local_hostname}/syslog",
47+
"filters": [
48+
{
49+
"type": "exclude",
50+
"expression": "INFO"
51+
}
52+
]
53+
}
54+
]
55+
}
56+
},
57+
"metrics_collected": {
58+
"prometheus": {
59+
"log_group_name": "${name}",
60+
"prometheus_config_path": "/etc/prometheus/prometheus.yaml",
61+
"emf_processor": {
62+
"metric_declaration_dedup": true,
63+
"metric_namespace": "${name}",
64+
"metric_unit": {
65+
"graphdb_nonheap_used_mem": "Bytes",
66+
"graphdb_work_dir_used": "Bytes",
67+
"graphdb_threads_count": "Count",
68+
"graphdb_logs_dir_free": "Bytes",
69+
"graphdb_heap_committed_mem": "Bytes",
70+
"graphdb_logs_dir_used": "Bytes",
71+
"graphdb_heap_max_mem": "Bytes",
72+
"graphdb_heap_init_mem": "Bytes",
73+
"graphdb_nonheap_init_mem": "Bytes",
74+
"graphdb_class_count": "Count",
75+
"graphdb_mem_garbage_collections_count": "Count",
76+
"graphdb_data_dir_free": "Bytes",
77+
"graphdb_nonheap_max_mem": "Bytes",
78+
"graphdb_work_dir_free": "Bytes",
79+
"graphdb_cpu_load": "Percent",
80+
"graphdb_data_dir_used": "Bytes",
81+
"graphdb_nonheap_committed_mem": "Bytes",
82+
"graphdb_heap_used_mem": "Bytes",
83+
"graphdb_open_file_descriptors": "Count",
84+
"graphdb_nodes_in_cluster": "Count",
85+
"graphdb_nodes_in_sync": "Count",
86+
"graphdb_nodes_out_of_sync": "Count",
87+
"graphdb_nodes_disconnected": "Count",
88+
"graphdb_nodes_syncing": "Count",
89+
"graphdb_leader_elections_count": "Count",
90+
"graphdb_failure_recoveries_count": "Count"
91+
},
92+
"metric_declaration": [
93+
{
94+
"source_labels": [
95+
"job"
96+
],
97+
"label_matcher": "graphdb_infrastructure_monitor",
98+
"dimensions": [
99+
[
100+
"host"
145101
]
102+
],
103+
"metric_selectors": [
104+
"^graphdb_nonheap_used_mem$",
105+
"^graphdb_work_dir_used$",
106+
"^graphdb_threads_count$",
107+
"^graphdb_logs_dir_free$",
108+
"^graphdb_heap_committed_mem$",
109+
"^graphdb_logs_dir_used$",
110+
"^graphdb_heap_max_mem$",
111+
"^graphdb_heap_init_mem$",
112+
"^graphdb_nonheap_init_mem$",
113+
"^graphdb_class_count$",
114+
"^graphdb_mem_garbage_collections_count$",
115+
"^graphdb_data_dir_free$",
116+
"^graphdb_nonheap_max_mem$",
117+
"^graphdb_work_dir_free$",
118+
"^graphdb_cpu_load$",
119+
"^graphdb_data_dir_used$",
120+
"^graphdb_nonheap_committed_mem$",
121+
"^graphdb_heap_used_mem$",
122+
"^graphdb_open_file_descriptors$"
123+
]
146124
},
147-
"diskio": {
148-
"measurement": [
149-
"io_time"
150-
],
151-
"metrics_collection_interval": 10,
152-
"resources": [
153-
"*"
125+
{
126+
"source_labels": [
127+
"job"
128+
],
129+
"label_matcher": "graphdb_cluster_monitor",
130+
"dimensions": [
131+
[
132+
"host"
154133
]
155-
},
156-
"mem": {
157-
"measurement": [
158-
"mem_used_percent",
159-
"mem_free",
160-
"mem_available_percent",
161-
"mem_total"
162-
],
163-
"metrics_collection_interval": 10
134+
],
135+
"metric_selectors": [
136+
"^graphdb_nodes_in_cluster$",
137+
"^graphdb_nodes_in_sync$",
138+
"^graphdb_nodes_out_of_sync$",
139+
"^graphdb_nodes_disconnected$",
140+
"^graphdb_nodes_syncing$",
141+
"^graphdb_leader_elections_count$",
142+
"^graphdb_failure_recoveries_count$"
143+
]
164144
}
145+
]
165146
}
147+
}
148+
}
149+
},
150+
"metrics": {
151+
"aggregation_dimensions": [
152+
[
153+
"AutoScalingGroupName"
154+
]
155+
],
156+
"append_dimensions": {
157+
"InstanceId": "$${aws:InstanceId}",
158+
"AutoScalingGroupName": "$${aws:AutoScalingGroupName}"
159+
},
160+
"metrics_collected": {
161+
"cpu": {
162+
"measurement": [
163+
"cpu_usage_user",
164+
"cpu_usage_system"
165+
],
166+
"metrics_collection_interval": 10,
167+
"totalcpu": false
168+
},
169+
"disk": {
170+
"measurement": [
171+
"used_percent",
172+
"disk_free",
173+
"disk_used_percent"
174+
],
175+
"metrics_collection_interval": 10,
176+
"resources": [
177+
"*"
178+
]
179+
},
180+
"diskio": {
181+
"measurement": [
182+
"io_time"
183+
],
184+
"metrics_collection_interval": 10,
185+
"resources": [
186+
"*"
187+
]
188+
},
189+
"mem": {
190+
"measurement": [
191+
"mem_used_percent",
192+
"mem_free",
193+
"mem_available_percent",
194+
"mem_total"
195+
],
196+
"metrics_collection_interval": 10
197+
}
166198
}
199+
}
167200
}

0 commit comments

Comments
 (0)