Skip to content

Commit

Permalink
dashboard/app: periodically remove coverage garbage from spanner
Browse files Browse the repository at this point in the history
  • Loading branch information
tarasmadan committed Nov 29, 2024
1 parent 52b38cc commit 23a3568
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 1 deletion.
60 changes: 60 additions & 0 deletions dashboard/app/batch_coverage.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ import (
"context"
"fmt"
"net/http"
"os"
"strconv"

"cloud.google.com/go/batch/apiv1/batchpb"
"cloud.google.com/go/bigquery"
"cloud.google.com/go/civil"
"cloud.google.com/go/spanner"
"github.com/google/syzkaller/pkg/coveragedb"
"google.golang.org/api/iterator"
"google.golang.org/appengine/v2"
Expand Down Expand Up @@ -151,3 +153,61 @@ func nsDataAvailable(ctx context.Context, ns string) ([]coveragedb.TimePeriod, [
}
return periods, recordsCount, nil
}

// Abandoned DB cleanup is expensive.
// Every namespace creates min 3 sessions every day (yesterday, today, this month).
// It means every day we create 3 garbage sessions.
// Every session is 5-10k file records.
// To clean up everything once/week we have to garbage 7 * 3 * count(namespaces) at least.
func handleBatchCoverageClean(w http.ResponseWriter, r *http.Request) {
paramSessions := r.FormValue("sessions")
if paramSessions == "" {
paramSessions = "10"
}
maxSessionsToDel, err := strconv.Atoi(r.FormValue("sessions"))
if err != nil {
w.WriteHeader(http.StatusBadRequest)
w.Write([]byte("failed to parse 'sessions', integer expected"))
return
}

deletedRows, err := cleanCoverageDB(maxSessionsToDel)
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
w.Write([]byte(fmt.Sprintf("failed to cleanCoverageDB: %s", err.Error())))
return
}
w.Write([]byte(fmt.Sprintf("cleanCoverageDB deleted %d rows", deletedRows)))
}

func cleanCoverageDB(maxSessionsToDelete int) (int64, error) {
ctx := context.Background()
client, err := spanner.NewClient(ctx, os.Getenv("GOOGLE_CLOUD_PROJECT"))
if err != nil {
return 0, fmt.Errorf("failed to coveragedb.NewClient: %w", err)
}
defer client.Close()
var rowCount int64

_, err = client.ReadWriteTransaction(ctx, func(ctx context.Context, txn *spanner.ReadWriteTransaction) error {
stmt := spanner.Statement{
SQL: fmt.Sprintf(` delete from files
where files.session in (
select
distinct(files.session)
from files
left join merge_history
on files.session = merge_history.session
where merge_history.session is NULL
limit %d
);`, maxSessionsToDelete),
}
var err error
rowCount, err = txn.Update(ctx, stmt)
if err != nil {
return fmt.Errorf("txn.Update: %w", err)
}
return nil
})
return rowCount, err
}
1 change: 1 addition & 0 deletions dashboard/app/batch_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
func initBatchProcessors() {
http.HandleFunc("/cron/batch_coverage", handleBatchCoverage)
http.HandleFunc("/cron/batch_db_export", handleBatchDBExport)
http.HandleFunc("/cron/batch_coverage_clean", handleBatchCoverageClean)
}

// from https://cloud.google.com/batch/docs/samples/batch-create-script-job
Expand Down
8 changes: 7 additions & 1 deletion dashboard/app/cron.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,13 @@ cron:
schedule: every sunday 00:00
# Update other coverage numbers every day.
- url: /cron/batch_coverage?days=true&months=true&steps=10
schedule: every 24 hours
schedule: every day 00:00
# Clean up coverage db every week.
# We're adding data w/o transactions.
# It is important to run clean operation when there are no batch_coverage in progress.
# Let's delete max 7 days * 3 namespaces * 3 new_sessions/day * 2 to_remove old too
- url: /cron/batch_coverage_clean?sessions=126
schedule: every saturday 12:00
# Export reproducers every week.
- url: /cron/batch_db_export
schedule: every saturday 00:00
6 changes: 6 additions & 0 deletions tools/syz-covermerger/init_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ CREATE TABLE
(session, filepath) );')
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="$create_table"
echo "creating 'files' index"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="CREATE INDEX files_session ON files (session);"

echo "drop table 'merge_history' if exists"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
Expand All @@ -43,6 +46,9 @@ CREATE TABLE
(namespace, repo, duration, dateto) );')
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="$create_table"
echo "creating 'merge_history' index"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
--ddl="CREATE INDEX merge_history_session ON files (session);"

echo "drop table 'file_subsystems' if exists"
gcloud spanner databases ddl update $db --instance=syzbot --project=syzkaller \
Expand Down

0 comments on commit 23a3568

Please sign in to comment.