Skip to content

Commit

Permalink
swapped new and old labels in one line of the code
Browse files Browse the repository at this point in the history
  • Loading branch information
simonleandergrimm committed Jul 9, 2024
1 parent 5830dd1 commit 3e49417
Showing 1 changed file with 7 additions and 21 deletions.
28 changes: 7 additions & 21 deletions scripts/2024-07-09-print-clade-count-comparison.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import defaultdict

BIOPROJECT_DIR = "bioprojects"
Expand Down Expand Up @@ -38,15 +35,6 @@
]


def read_clade_counts(file_path):
df = pd.read_csv(file_path, sep="\t")
sub_df = df[(df["taxid"] == 10239)]["n_reads_clade", "sample"]
dict = {}
for reads, sample in sub_df.itertuples():
dict[sample] = reads
return dict


def collect_data():
data = defaultdict(lambda: defaultdict(dict))
for study, bioprojects in TARGET_STUDY_METADATA.items():
Expand All @@ -68,8 +56,7 @@ def collect_data():
n_reads_direct,
n_reads_clade,
) = line.strip().split("\t")
# if taxid == "10239": # Check if taxid is 10239 (Viruses)
data[sample][name]["old"] = int(n_reads_clade)
data[sample][name]["new"] = int(n_reads_clade)
with open(old_file, "r") as f:
next(f)
for line in f:
Expand All @@ -82,19 +69,18 @@ def collect_data():
n_reads_direct,
n_reads_clade,
) = line.strip().split("\t")
# if taxid == "10239": # Check if taxid is 10239 (Viruses)
data[sample][name]["new"] = int(n_reads_clade)
data[sample][name]["old"] = int(n_reads_clade)

differences = defaultdict(lambda: defaultdict(list))
for sample in data.keys():
for name in data[sample].keys():
new_count = data[sample][name].get("new", 0)
old_count = data[sample][name].get("old", 0)
differences[sample][name] = int(new_count - old_count)
return differences
return data, differences


differences = collect_data()
data, differences = collect_data()

df = pd.DataFrame(differences).transpose()

Expand All @@ -105,9 +91,9 @@ def collect_data():
row = df.loc[sample]
sorted_row = row.abs().sort_values(ascending=False)

top_5 = sorted_row.head(15)
top_15 = sorted_row.head(15)

print(f"\nSample: {sample}")
for name in top_5.index:
for name in top_15.index:
value = row[name]
print(f"{name}: {value}")
print(f"{name}: {value} ({data[sample][name].get('new', 0)})")

0 comments on commit 3e49417

Please sign in to comment.