From 3450269f2d2e1dfaba8c5a016c5fb663d383f795 Mon Sep 17 00:00:00 2001 From: Taras Madan Date: Fri, 29 Nov 2024 16:04:24 +0100 Subject: [PATCH] dashboard/app: periodically remove coverage garbage from spanner --- dashboard/app/batch_coverage.go | 70 ++++++++++++++++++++++++++++++++ dashboard/app/batch_main.go | 1 + dashboard/app/cron.yaml | 8 +++- tools/syz-covermerger/init_db.sh | 6 +++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/dashboard/app/batch_coverage.go b/dashboard/app/batch_coverage.go index d5dc3b83f3ed..584d33505f56 100644 --- a/dashboard/app/batch_coverage.go +++ b/dashboard/app/batch_coverage.go @@ -7,11 +7,13 @@ import ( "context" "fmt" "net/http" + "os" "strconv" "cloud.google.com/go/batch/apiv1/batchpb" "cloud.google.com/go/bigquery" "cloud.google.com/go/civil" + "cloud.google.com/go/spanner" "github.com/google/syzkaller/pkg/coveragedb" "google.golang.org/api/iterator" "google.golang.org/appengine/v2" @@ -151,3 +153,71 @@ func nsDataAvailable(ctx context.Context, ns string) ([]coveragedb.TimePeriod, [ } return periods, recordsCount, nil } + +// Abandoned DB cleanup is expensive. +// Every namespace creates min 3 sessions every day (yesterday, today, this month). +// It means every day we deprecate 3 sessions records. +// Every session is ~10k file records. Plus some mutations in the index. +// One transaction allows to delete up to 80k rows. +// To clean up everything once/week we have to garbage 7 * 3 * count(namespaces) at least. +func handleBatchCoverageClean(w http.ResponseWriter, r *http.Request) { + paramSessions := r.FormValue("sessions") + if paramSessions == "" { + paramSessions = "3" // one day, one namespace means 3 records + } + maxSessionsToDel, err := strconv.Atoi(r.FormValue("sessions")) + if err != nil { + w.WriteHeader(http.StatusBadRequest) + w.Write([]byte("failed to parse 'sessions', integer expected")) + return + } + ctx := context.Background() + for i := 0; i < maxSessionsToDel; i++ { + deletedRows, err := deleteGarbageSession(ctx) + if err != nil { + errMsg := fmt.Sprintf("failed to cleanCoverageDB: %s", err.Error()) + log.Errorf(ctx, errMsg) + w.Write([]byte(errMsg)) + return + } + w.Write([]byte(fmt.Sprintf("deleteGarbageSession -> -%d rows\n", deletedRows))) + } +} + +// deleteGarbageSession generates approximately 20k mutations (10k in files and 10k in index) +// Spanner limit for every transaction is 80k mutations. +// It means we can delete up to 4 sessions data at once. +// Let's keep it simple and delete only 1 session at once. +// +// deleteGarbageSession returns the deleted rows count. +// (0, nil) means there is no data to delete. +func deleteGarbageSession(ctx context.Context) (int64, error) { + client, err := coveragedb.NewClient(ctx, os.Getenv("GOOGLE_CLOUD_PROJECT")) + if err != nil { + return 0, fmt.Errorf("failed to coveragedb.NewClient: %w", err) + } + defer client.Close() + var rowCount int64 + + _, err = client.ReadWriteTransaction(ctx, func(ctx context.Context, txn *spanner.ReadWriteTransaction) error { + stmt := spanner.Statement{ + SQL: ` delete from files + where files.session in ( + select + distinct(files.session) + from files + left join merge_history + on files.session = merge_history.session + where merge_history.session is NULL + limit 1 + );`, + } + var err error + rowCount, err = txn.Update(ctx, stmt) + if err != nil { + return fmt.Errorf("txn.Update: %w", err) + } + return nil + }) + return rowCount, err +} diff --git a/dashboard/app/batch_main.go b/dashboard/app/batch_main.go index e1b514a26346..596f50e502f0 100644 --- a/dashboard/app/batch_main.go +++ b/dashboard/app/batch_main.go @@ -18,6 +18,7 @@ import ( func initBatchProcessors() { http.HandleFunc("/cron/batch_coverage", handleBatchCoverage) http.HandleFunc("/cron/batch_db_export", handleBatchDBExport) + http.HandleFunc("/cron/batch_coverage_clean", handleBatchCoverageClean) } // from https://cloud.google.com/batch/docs/samples/batch-create-script-job diff --git a/dashboard/app/cron.yaml b/dashboard/app/cron.yaml index a0acdf7d2f02..31a57974ef75 100644 --- a/dashboard/app/cron.yaml +++ b/dashboard/app/cron.yaml @@ -24,7 +24,13 @@ cron: schedule: every sunday 00:00 # Update other coverage numbers every day. - url: /cron/batch_coverage?days=true&months=true&steps=10 - schedule: every 24 hours + schedule: every day 00:00 +# Clean up coverage db every week. +# We're adding data w/o transactions. +# It is important to run clean operation when there are no batch_coverage in progress. +# Let's delete max 7 days * 3 namespaces * 3 new_sessions/day * 2 to_remove old too +- url: /cron/batch_coverage_clean?sessions=126 + schedule: every saturday 12:00 # Export reproducers every week. - url: /cron/batch_db_export schedule: every saturday 00:00 diff --git a/tools/syz-covermerger/init_db.sh b/tools/syz-covermerger/init_db.sh index d31f68e9ae8f..2af9a8aed23e 100755 --- a/tools/syz-covermerger/init_db.sh +++ b/tools/syz-covermerger/init_db.sh @@ -23,6 +23,9 @@ CREATE TABLE (session, filepath) );') gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \ --ddl="$create_table" +echo "creating 'files' index" +gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \ + --ddl="CREATE INDEX files_session ON files (session);" echo "drop table 'merge_history' if exists" gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \ @@ -43,6 +46,9 @@ CREATE TABLE (namespace, repo, duration, dateto) );') gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \ --ddl="$create_table" + echo "creating 'merge_history' index" + gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \ + --ddl="CREATE INDEX merge_history_session ON merge_history (session);" echo "drop table 'file_subsystems' if exists" gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \