fixed type issues.

naobservatory · Dec 2, 2023 · c0f5185 · c0f5185
1 parent f989802
commit c0f5185
Show file tree

Hide file tree

Showing 5 changed files with 7 additions and 22 deletions.
diff --git a/preprint_stats/cost_required_stat.py b/preprint_stats/cost_required_stat.py
@@ -2,7 +2,7 @@
 
 import csv
 from dataclasses import dataclass
-from scipy.stats import gmean
+from scipy.stats import gmean  # type: ignore
 import numpy as np
 
 PERCENTILES = [5, 25, 50, 75, 95]

diff --git a/preprint_stats/preprint_stats.py b/preprint_stats/preprint_stats.py
@@ -2,7 +2,7 @@
 import json
 import subprocess
 import numpy as np
-from scipy.stats import gmean
+from scipy.stats import gmean  # type: ignore
 
 
 dashboard = os.path.expanduser("~/code/mgs-pipeline/dashboard/")

diff --git a/preprint_stats/reads_required_stat.py b/preprint_stats/reads_required_stat.py
@@ -2,7 +2,7 @@
 
 import csv
 from dataclasses import dataclass
-from scipy.stats import gmean
+from scipy.stats import gmean  # type: ignore
 
 import numpy as np
 

diff --git a/preprint_stats/rothman_inter_site_comparison.py b/preprint_stats/rothman_inter_site_comparison.py
@@ -1,6 +1,6 @@
 import csv
 import pandas as pd
-from scipy.stats import gmean
+from scipy.stats import gmean  # type: ignore
 from math import log
 from collections import defaultdict
 
@@ -13,12 +13,7 @@ def reads_df() -> pd.DataFrame:
 
 
 def rothman_fits_data() -> pd.DataFrame:
-    data = {
-        "predictor_type": [],
-        "virus": [],
-        "study": [],
-        "location": [],
-    }
+    data = defaultdict(list)
     for p in PERCENTILES:
         data[f"{p}"] = []
 
@@ -63,9 +58,7 @@ def compute_geo_mean_ratio(df: pd.DataFrame) -> pd.DataFrame:
         for quantile in PERCENTILES:
             non_htp_quantile_gm = (gmean(non_htp_df[quantile].dropna()),)
             htp_quantile = gmean(htp_df[quantile].dropna())
-            print(non_htp_quantile_gm, htp_quantile)
             variance = float(htp_quantile - non_htp_quantile_gm)
-            # print(variance)
 
             gmean_variance[f"variance_{quantile}"].append(round(variance, 2))
 

diff --git a/preprint_stats/spurbeck_inter_site_comparison.py b/preprint_stats/spurbeck_inter_site_comparison.py
@@ -1,6 +1,6 @@
 import csv
 import pandas as pd
-from scipy.stats import gmean
+from scipy.stats import gmean  # type: ignore
 from math import log
 from collections import defaultdict
 
@@ -13,13 +13,7 @@ def reads_df() -> pd.DataFrame:
 
 
 def spurbeck_fits_data() -> pd.DataFrame:
-    data = {
-        "predictor_type": [],
-        "virus": [],
-        "study": [],
-        "location": [],
-        "enriched": [],
-    }
+    data = defaultdict(list)
     for p in PERCENTILES:
         data[f"{p}"] = []
 
@@ -59,7 +53,6 @@ def compute_geo_mean_ratio(df: pd.DataFrame) -> pd.DataFrame:
     ]
     gmean_variance = defaultdict(list)
     for virus in df["virus"].unique():
-        print(virus)
         if virus not in target_viruses:
             continue
         virus_df = df[df["virus"] == virus]
@@ -72,7 +65,6 @@ def compute_geo_mean_ratio(df: pd.DataFrame) -> pd.DataFrame:
                 gmean(non_enriched_virus_df[quantile].dropna()),
             )
             variance = float(enriched_gm - non_enriched_gm)
-            print(variance)
 
             gmean_variance[f"variance_{quantile}"].append(round(variance, 2))