renamed all tables to fit preprint structure.

naobservatory · Dec 19, 2023 · cfb7629 · cfb7629
1 parent 76968e0
commit cfb7629
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 26 deletions.
diff --git a/preprint_stats/cost_required_stat.py → preprint_stats/supplement_table_10.py b/preprint_stats/cost_required_stat.py → preprint_stats/supplement_table_10.py
@@ -77,7 +77,7 @@ def start():
         "spurbeck": "Spurbeck",
     }
     with open(
-        f"cost_estimates_${DOLLAR_PER_1B_READS}_per_b.tsv",
+        f"supplement_table_10.tsv",
         mode="w",
         newline="",
     ) as file:

diff --git a/preprint_stats/relative_abundance_table.py → preprint_stats/supplement_table_6.py b/preprint_stats/relative_abundance_table.py → preprint_stats/supplement_table_6.py
@@ -25,14 +25,11 @@ def tidy_number(reads_required=int) -> str:
     if is_negative:
         exponent = exponent[1:]
 
-    # Remove the leading zero from the exponent if it's there
     exponent = exponent.lstrip("0")
 
-    # Add back the superscript minus if the exponent was negative
     if is_negative:
         exponent = "⁻" + exponent
 
-    # Now replace the digits with superscript characters
     superscript_map = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
     exponent = exponent.translate(superscript_map)
 
@@ -48,10 +45,6 @@ def read_data() -> dict[tuple[str, str, str, str], SummaryStats]:
             predictor_type = row["predictor_type"]
             study = row["study"]
             location = row["location"]
-            if (
-                virus == "AAV5"
-            ):  # FIXME: Remove this when AAV5 is dropped earlier.
-                continue
             data[virus, predictor_type, study, location] = SummaryStats(
                 mean=tidy_number(float(row["mean"])),
                 std=tidy_number(float(row["std"])),
@@ -82,7 +75,7 @@ def create_tsv():
 
     headers = ["Virus", "Study", "Median", "Lower", "Upper"]
 
-    with open("output_summary.tsv", "w", newline="") as file:
+    with open("supplement_table_6.tsv", "w", newline="") as file:
         writer = csv.DictWriter(file, fieldnames=headers, delimiter="\t")
         writer.writeheader()
 

diff --git a/...t_stats/spurbeck_inter_site_comparison.py → preprint_stats/supplement_table_7.py b/...t_stats/spurbeck_inter_site_comparison.py → preprint_stats/supplement_table_7.py
@@ -60,7 +60,6 @@ def compute_geo_mean_ratio(df: pd.DataFrame) -> pd.DataFrame:
     ]
     gmean_variance = defaultdict(list)
     for virus in df["virus"].unique():
-        print(virus)
         if virus not in target_viruses:
             continue
         virus_df = df[df["virus"] == virus]
@@ -73,9 +72,10 @@ def compute_geo_mean_ratio(df: pd.DataFrame) -> pd.DataFrame:
                 gmean(non_enriched_virus_df[quantile].dropna()),
             )
             variance = float(enriched_gm - non_enriched_gm)
-            print(variance)
 
-            gmean_variance[f"variance_{quantile}"].append(round(variance, 2))
+            gmean_variance[f"Difference at {quantile}"].append(
+                round(variance, 2)
+            )
 
     return pd.DataFrame(gmean_variance)
 
@@ -85,7 +85,7 @@ def start():
 
     variance_df = compute_geo_mean_ratio(df_fits)
 
-    variance_df.to_csv("spurbeck_variance.tsv", sep="\t", index=False)
+    variance_df.to_csv("supplement_table_7.tsv", sep="\t", index=False)
 
 
 if __name__ == "__main__":

diff --git a/...nt_stats/rothman_inter_site_comparison.py → preprint_stats/supplement_table_8.py b/...nt_stats/rothman_inter_site_comparison.py → preprint_stats/supplement_table_8.py
@@ -64,12 +64,11 @@ def compute_geo_mean_ratio(df: pd.DataFrame) -> pd.DataFrame:
         for quantile in PERCENTILES:
             non_htp_quantile_gm = (gmean(non_htp_df[quantile].dropna()),)
             htp_quantile = gmean(htp_df[quantile].dropna())
-            print(non_htp_quantile_gm, htp_quantile)
             variance = float(htp_quantile - non_htp_quantile_gm)
-            # print(variance)
-
-            gmean_variance[f"variance_{quantile}"].append(round(variance, 2))
 
+            gmean_variance[f"Difference at {quantile}"].append(
+                round(variance, 2)
+            )
     return pd.DataFrame(gmean_variance)
 
 
@@ -78,7 +77,7 @@ def start():
 
     variance_df = compute_geo_mean_ratio(df_fits)
 
-    variance_df.to_csv("rothman_variance.tsv", sep="\t", index=False)
+    variance_df.to_csv("supplement_table_8.tsv", sep="\t", index=False)
 
 
 if __name__ == "__main__":

diff --git a/preprint_stats/reads_required_stat.py → preprint_stats/supplement_table_9.py b/preprint_stats/reads_required_stat.py → preprint_stats/supplement_table_9.py
@@ -65,13 +65,10 @@ def tidy_number(reads_required=int) -> str:
 
     coefficient, exponent = sci_notation.split("e")
 
-    # Remove the leading '+' from the exponent
     exponent = exponent.replace("+", "")
-    # Remove the leading zero from the exponent if it's there
     if exponent.startswith("0") and len(exponent) > 1:
         exponent = exponent[1:]
 
-    # Now replace the digits with superscript characters
     exponent = (
         exponent.replace("0", "⁰")
         .replace("1", "¹")
@@ -88,10 +85,6 @@ def tidy_number(reads_required=int) -> str:
     return f"{coefficient} x 10{exponent}"
 
 
-# def tidy_number(reads_required=int) -> str:
-#     scientific_notation = "{:.2e} reads".format(round(reads_required))
-
-
 def start():
     data = read_data()
     TARGET_INCIDENCE = 0.01
@@ -102,7 +95,7 @@ def start():
         "rothman": "Rothman",
         "spurbeck": "Spurbeck",
     }
-    with open("read_estimates.tsv", mode="w", newline="") as file:
+    with open("supplement_table_9.tsv", mode="w", newline="") as file:
         tsv_writer = csv.writer(file, delimiter="\t")
         tsv_writer.writerow(
             [