Skip to content

Commit

Permalink
* Fixed bug where kallisto quantification would sometimes fail to sum…
Browse files Browse the repository at this point in the history
… transcripts to genes properly.
  • Loading branch information
GuyTeichman committed Oct 3, 2024
1 parent 8f5da01 commit 770db8f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
6 changes: 6 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
History
=======

4.?.? (2024-??-??)
-------------------

Fixed
******
* Fixed bug where kallisto quantification would sometimes fail to sum transcripts to genes properly.

4.1.0 (2024-09-16)
-------------------
Expand Down
5 changes: 2 additions & 3 deletions rnalysis/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -2016,16 +2016,15 @@ def _sum_transcripts_to_genes(tpm: pl.DataFrame, counts: pl.DataFrame, gtf_path:
pl.exclude(counts.columns[0]).sum().truediv(10 ** 6)).collect()
tpm_cpy = tpm.lazy().join(transcript2gene, left_on=tpm.columns[0], right_on='Transcript ID',
how='left')
tpm_by_gene = tpm_cpy.drop(cs.first()).group_by('Gene ID').sum()
tpm_by_gene = tpm_cpy.drop(cs.first()).drop_nulls().group_by('Gene ID').sum()
count_per_gene = tpm_by_gene.with_columns(
[(pl.col(col) * library_sizes[col][0]).alias(col) for col in tpm.columns[1:]]).collect()
elif summation_method == 'raw':
count_cpy = counts.lazy().join(transcript2gene, left_on=tpm.columns[0],
right_on='Transcript ID', how='left')
count_per_gene = count_cpy.drop(cs.first()).group_by('Gene ID').sum().collect()
count_per_gene = count_cpy.drop(cs.first()).drop_nulls().group_by('Gene ID').sum().collect()
else:
raise ValueError(f"Invalid value for 'summation_method': '{summation_method}'.")

if len(count_per_gene) == 0:
continue
pbar.update(8)
Expand Down

0 comments on commit 770db8f

Please sign in to comment.