-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
134 lines (119 loc) · 6.65 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
// +build go1.17
package main
import (
"fmt"
"io"
"log"
"os"
"strings"
"time"
"github.com/alexflint/go-arg"
analyze "github.com/kedark3/cpa/cmd/analyze"
notify "github.com/kedark3/cpa/cmd/notify"
prometheus "github.com/kedark3/cpa/cmd/prometheus"
exutil "github.com/openshift/openshift-tests/test/extended/util"
g "github.com/onsi/ginkgo"
o "github.com/onsi/gomega"
)
func main() {
var args struct {
NoClrscr bool `arg:"--noclrscr" help:"Do not clear screen after each iteration. Clears screen by default." default:"false"`
Queries string `arg:"-q,--queries" help:"queries file to use" default:"queries.yaml"`
QueryFrequency time.Duration `arg:"-f,--query-frequency" help:"How often do we run queries. You can pass values like 4h or 1h10m10s" default:"20s"`
Timeout time.Duration `arg:"-t,--timeout" help:"Duration to run Continuous Performance Analysis. You can pass values like 4h or 1h10m10s" default:"4h"`
LogOutput bool `arg:"-l,--log-output" help:"Output will be stored in a log file(cpa.log) in addition to stdout." default:"false"`
TerminateBenchmark string `arg:"-k,--terminate-benchmark" help:"When CPA is running in parallel with benchmark job, let CPA know to kill benchmark if any query fail. (E.g. -k <processID>) Helpful to preserve cluster for further analysis." default:""`
Verbose bool `arg:"-v,--verbose" help:"When this mode is enabled, output will contain much more information about each query."`
}
arg.MustParse(&args)
o.RegisterFailHandler(g.Fail)
if args.LogOutput {
f, err := os.OpenFile("cpa_"+time.Now().Format("2006-01-02_15:04:05")+".log", os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0644)
multiWriter := io.MultiWriter(os.Stdout, f)
if err != nil {
log.Fatal(err)
}
//defer to close when you're done with it, not because you think it's idiomatic!
defer f.Close()
//set output of logs to f
log.SetOutput(multiWriter)
}
oc := exutil.NewCLI("prometheus-cpa", exutil.KubeConfigPath())
// secrets, err := oc.AdminKubeClient().CoreV1().Secrets("openshift-monitoring").List(metav1.ListOptions{})
// if err != nil {
// log.Printf("An Error has occured %s", err)
// return
// }
// log.Printf("Found following secrets %d", secrets.Size())
url, bearerToken, err := prometheus.LocatePrometheus(oc)
if err != nil {
log.Printf("Oops something went wrong while trying to fetch Prometheus url and bearerToken")
log.Println(err)
return
}
slackConfig, err := notify.ReadslackConfig()
if err != nil {
log.Printf("Oops something went wrong while trying to fetch Slack Config")
log.Println(err)
return
}
// fmt.Println("UserID, Channel ID, slackToken: ", slackConfig)
// queries := []string{
// `sum(kube_pod_status_phase{}) by (phase) > 0`, // pod count by phase
// `sum(kube_namespace_status_phase) by (phase)`, // namespace count by phase
// `sum(kube_node_status_condition{status="true"}) by (condition) > 0`, // node condition by status
// `sum by (instance) (rate(ovnkube_master_pod_creation_latency_seconds_sum[2m]))`, // OVN pod creation latency
// `sum by (instance) (rate(ovnkube_node_cni_request_duration_seconds_sum{command="ADD"}[2m]))`, // CNI Request duration for "ADD" command over 2m interval
// `sum by (instance) (rate(ovnkube_node_cni_request_duration_seconds_sum{command="DEL"}[2m]))`, // CNI Request duration for "DEL" command over 2m interval
// `sum(container_memory_working_set_bytes{pod=~"ovnkube-master-.*",namespace="openshift-ovn-kubernetes",container=""}) by (pod, node)`, // ovnkube-master Memory Usage
// `sum(container_memory_working_set_bytes{pod=~"ovnkube-master-.*",namespace="openshift-ovn-kubernetes",container!=""}) by (pod, node)`, // ovnkube-master Memory Usage
// `topk(10, rate(container_cpu_usage_seconds_total{pod=~"ovnkube-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}[2m])*100)`, // top 10 - ovn-controller cpu usage
// `topk(10, sum(container_memory_working_set_bytes{pod=~"ovnkube-node-.*",namespace="openshift-ovn-kubernetes",container="ovn-controller"}) by (node))`, // top 10 - ovn-controller memory usage
// `sum(container_memory_rss{pod="prometheus-k8s-0",namespace!="",name!="",container="prometheus"}) by (pod)`, // Prometheus replica 0 rss memory
// `sum(container_memory_rss{pod="prometheus-k8s-1",namespace!="",name!="",container="prometheus"}) by (pod)`, // Prometheus replica 1 rss memory
// `rate(container_cpu_usage_seconds_total{pod=~"ovnkube-master.*",namespace="openshift-ovn-kubernetes",container!=""}[2m])*100`, // CPU usage ovnkube-master components over 2m interval
// `sum by (condition)(cluster_operator_conditions{condition!=""})`,
// }
// log.Printf("URL is %s and bearerToken is %s", url, bearerToken)
// for _, query := range queries {
// fmt.Println(prometheus.RunQuery(query, oc, url, bearerToken))
// fmt.Println()
// }
tb := make(chan bool)
c := make(chan string)
queryList, err := analyze.ReadPrometheusQueries(args.Queries)
if err != nil {
log.Println(err)
return
}
log.Println("Read following queries:")
for _, item := range queryList {
log.Println(item.Query)
}
var thread_ts string
if slackConfig.ChannelID != "" && slackConfig.UserID != "" && slackConfig.SlackToken != "" {
thread_ts = slackConfig.SlackNotify("New benchmark started, we will monitor it for performance and notify here with the issues.", "")
defer slackConfig.SlackNotify(fmt.Sprintf("Continuous Perf Analysis has ended all iterations. Total time spent: %s", args.Timeout.String()), thread_ts)
} else {
log.Printf("No slack notifications will be sent as Slack Config is not properly setup. One of the fields may be empty. Check config/slack.yaml")
}
go func(c chan string) {
for i := 1; ; i++ {
log.Printf("\n%[2]s\nIteration no. %[1]d\n%[2]s\n", i, strings.Repeat("~", 80))
analyze.Queries(queryList, oc, url, bearerToken, c, tb, args.TerminateBenchmark, args.Verbose)
time.Sleep(args.QueryFrequency)
if !args.NoClrscr {
log.Print("\033[H\033[2J") // clears screen before printing next iteration
}
}
}(c)
go slackConfig.Notify(c, thread_ts)
if args.TerminateBenchmark != "" {
go notify.TerminateBenchmark(tb, args.TerminateBenchmark)
}
d, err := time.ParseDuration(args.Timeout.String())
if err != nil {
log.Println(err)
}
time.Sleep(d)
}