Skip to content

Commit

Permalink
* When running differential expression or feature counting, RNAlysis …
Browse files Browse the repository at this point in the history
…session reports will automatically include a logfile with R session info.

* Added optional parameters to all differential expression functions, allowing users to return a path to a logfile with R session info.
  • Loading branch information
GuyTeichman committed Sep 16, 2024
1 parent 17f6e8f commit 4ce0e72
Show file tree
Hide file tree
Showing 28 changed files with 286 additions and 62 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Changed
* Made small improvements to the RNAlysis graphical interface.
* RNAlysis and its dependencies now run on Numpy 2 instead of Numpy 1.
* RNAlysis now uses a different implementation of the K-Medoids clustering algorithm, which should be more stable and faster than the previous implementation. However, note that the two implementations may give slightly different results.
* When running differential expression or feature counting, RNAlysis session reports will automatically include a logfile with R session info.
* Added optional parameters to all differential expression functions, allowing users to return a path to a logfile with R session info.

Fixed
******
Expand Down
5 changes: 5 additions & 0 deletions rnalysis/data_files/r_templates/logging.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Open a connection to a log file
logfile <- file("$LOGFILE", open = "a")
# Redirect both output and messages to the file and console
sink(logfile, append = TRUE, split = TRUE)

4 changes: 4 additions & 0 deletions rnalysis/data_files/r_templates/sessioninfo_run.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# get session info
sessionInfo()
# Close the sink to stop redirecting output
sink()
30 changes: 20 additions & 10 deletions rnalysis/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -804,7 +804,9 @@ def featurecounts_single_end(input_folder: Union[str, Path], output_folder: Unio
ignore_secondary: bool = True,
count_fractionally: bool = False, is_long_read: bool = False,
report_read_assignment: Union[Literal['bam', 'sam', 'core'], None] = None,
threads: PositiveInt = 1) -> Tuple[filtering.CountFilter, pl.DataFrame, pl.DataFrame]:
threads: PositiveInt = 1, return_log: bool = False) -> Union[
Tuple[filtering.CountFilter, pl.DataFrame, pl.DataFrame],
Tuple[filtering.CountFilter, pl.DataFrame, pl.DataFrame, Path]]:
"""
Assign mapped single-end sequencing reads to specified genomic features using \
`RSubread featureCounts <https://doi.org/10.1093/bioinformatics/btt656>`_.
Expand Down Expand Up @@ -860,6 +862,9 @@ def featurecounts_single_end(input_folder: Union[str, Path], output_folder: Unio
:type report_read_assignment: 'bam', 'sam', 'core', or None (default=None)
:param threads: number of threads to run bowtie2-build on. More threads will generally make index building faster.
:type threads: int > 0 (default=1)
:param return_log: if True, the function will return the path to the analysis logfile, \
which includes session info.
:type return_log: bool (default=False)
:return: a count matrix (CountFilter) containing feature counts for all input files, \
a DataFrame summarizing the features reads were aligned to, and a DataFrame summarizing the alignment statistics.
:rtype: (filtering.CountFilter, pl.DataFrame, pl.DataFrame)
Expand All @@ -873,9 +878,9 @@ def featurecounts_single_end(input_folder: Union[str, Path], output_folder: Unio

new_sample_names = _featurecounts_get_sample_names(kwargs['files'], new_sample_names)

feature_counting.run_featurecounts_analysis(kwargs, output_folder, r_installation_folder)
counts, annotation, stats = _process_featurecounts_output(output_folder, new_sample_names)
return counts, annotation, stats
feature_counting.FeatureCountsRunner(kwargs, output_folder, r_installation_folder).run()
counts, annotation, stats, log_path = _process_featurecounts_output(output_folder, new_sample_names)
return (counts, annotation, stats, log_path) if return_log else (counts, annotation, stats)


@_func_type('paired')
Expand All @@ -891,7 +896,9 @@ def featurecounts_paired_end(input_folder: Union[str, Path], output_folder: Unio
count_chimeric_fragments: bool = False, min_fragment_length: NonNegativeInt = 50,
max_fragment_length: Union[PositiveInt, None] = 600,
report_read_assignment: Union[Literal['bam', 'sam', 'core'], None] = None,
threads: PositiveInt = 1) -> Tuple[filtering.CountFilter, pl.DataFrame, pl.DataFrame]:
threads: PositiveInt = 1, return_log: bool = False) -> Union[
Tuple[filtering.CountFilter, pl.DataFrame, pl.DataFrame],
Tuple[filtering.CountFilter, pl.DataFrame, pl.DataFrame, Path]]:
"""
Assign mapped paired-end sequencing reads to specified genomic features using \
`RSubread featureCounts <https://doi.org/10.1093/bioinformatics/btt656>`_. \
Expand Down Expand Up @@ -961,6 +968,9 @@ def featurecounts_paired_end(input_folder: Union[str, Path], output_folder: Unio
:type max_fragment_length: int > 0 or None (default=600)
:param threads: number of threads to run bowtie2-build on. More threads will generally make index building faster.
:type threads: int > 0 (default=1)
:param return_log: if True, the function will return the path to the analysis logfile, \
which includes session info.
:type return_log: bool (default=False)
:return: a count matrix (CountFilter) containing feature counts for all input files, \
a DataFrame summarizing the features reads were aligned to, and a DataFrame summarizing the alignment statistics.
:rtype: (filtering.CountFilter, pl.DataFrame, pl.DataFrame)
Expand All @@ -978,10 +988,9 @@ def featurecounts_paired_end(input_folder: Union[str, Path], output_folder: Unio
kwargs.update(paired_kwargs)

new_sample_names = _featurecounts_get_sample_names(kwargs['files'], new_sample_names)

feature_counting.run_featurecounts_analysis(kwargs, output_folder, r_installation_folder)
counts, annotation, stats = _process_featurecounts_output(output_folder, new_sample_names)
return counts, annotation, stats
feature_counting.FeatureCountsRunner(kwargs, output_folder, r_installation_folder).run()
counts, annotation, stats, log_path = _process_featurecounts_output(output_folder, new_sample_names)
return (counts, annotation, stats, log_path) if return_log else (counts, annotation, stats)


def _parse_featurecounts_misc_args(input_folder: Union[str, Path], output_folder: Path, gtf_file: Union[str, Path],
Expand Down Expand Up @@ -1034,6 +1043,7 @@ def _process_featurecounts_output(output_folder, new_sample_names):
counts_path = Path(output_folder).joinpath('featureCounts_counts.csv')
annotation_path = Path(output_folder).joinpath('featureCounts_annotation.csv')
stats_path = Path(output_folder).joinpath('featureCounts_stats.csv')
log_path = Path(output_folder).joinpath('logfile.log')

counts = filtering.CountFilter(counts_path)
counts.df = counts.df.rename(
Expand All @@ -1047,7 +1057,7 @@ def _process_featurecounts_output(output_folder, new_sample_names):
stats = stats.rename({oldname: newname for oldname, newname in zip(stats.columns[1:], new_sample_names)})
io.save_table(stats, stats_path) # re-save to reflect changes in column names

return counts, annotation, stats
return counts, annotation, stats, log_path


@readable_name('Bowtie2 build index')
Expand Down
48 changes: 37 additions & 11 deletions rnalysis/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -3046,8 +3046,8 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
r_installation_folder: Union[str, Path, Literal['auto']] = 'auto',
output_folder: Union[str, Path, None] = None,
random_effect: Union[str, None] = None, quality_weights: bool = False,
return_design_matrix: bool = False, return_code: bool = False) -> \
Tuple['DESeqFilter', ...]:
return_design_matrix: bool = False, return_code: bool = False,
return_log: bool = False) -> Tuple['DESeqFilter', ...]:
"""
Run differential expression analysis on the count matrix using the \
`Limma-Voom <https://doi.org/10.1186/gb-2014-15-2-r29>`_ pipeline. \
Expand Down Expand Up @@ -3095,8 +3095,12 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
:type quality_weights: bool (default=False)
:param return_design_matrix: if True, the function will return the sanitized design matrix used in the analysis.
:type return_design_matrix: bool (default=False)
:param return_code: if True, the function will return the R script used to generate the analysis results.
:param return_code: if True, the function will return the path to the R script \
used to generate the analysis results.
:type return_code: bool (default=False)
:param return_log: if True, the function will return the path to the analysis logfile, \
which includes session info.
:type return_log: bool (default=False)
:return: a tuple of DESeqFilter objects, one for each comparison
"""
if output_folder is not None:
Expand Down Expand Up @@ -3127,6 +3131,7 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
lrt_factors, model_factors, r_installation_folder,
random_effect, quality_weights).run()
code_path = None
log_path = None
outputs = []
for item in r_output_dir.iterdir():
if not item.is_file():
Expand All @@ -3135,6 +3140,8 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
outputs.append(DESeqFilter(item, log2fc_col='logFC', padj_col='adj.P.Val', pval_col='P.Value'))
elif item.suffix == '.R':
code_path = item
elif item.suffix == '.log':
log_path = item
if output_folder is not None:
with open(item) as infile, open(output_folder.joinpath(item.name), 'w') as outfile:
outfile.write(infile.read())
Expand All @@ -3146,6 +3153,11 @@ def differential_expression_limma_voom(self, design_matrix: Union[str, Path],
warnings.warn("No R script was generated during the analysis")
else:
return_val = [return_val, code_path]
if return_log:
if log_path is None:
warnings.warn("No log file was generated during the analysis")
else:
return_val = [return_val, log_path]
return return_val

@readable_name('Run Limma-Voom differential expression (simplified mode)')
Expand All @@ -3155,8 +3167,8 @@ def differential_expression_limma_voom_simplified(self, design_matrix: Union[str
output_folder: Union[str, Path, None] = None,
random_effect: Union[str, None] = None,
quality_weights: bool = False,
return_design_matrix: bool = False, return_code: bool = False
) -> Tuple['DESeqFilter', ...]:
return_design_matrix: bool = False, return_code: bool = False,
return_log: bool = False) -> Tuple['DESeqFilter', ...]:
"""
Run differential expression analysis on the count matrix using the \
`Limma-Voom <https://doi.org/10.1186/gb-2014-15-2-r29>`_ pipeline. \
Expand Down Expand Up @@ -3204,7 +3216,7 @@ def differential_expression_limma_voom_simplified(self, design_matrix: Union[str
output_folder=output_folder, random_effect=random_effect,
quality_weights=quality_weights,
return_design_matrix=return_design_matrix,
return_code=return_code)
return_code=return_code, return_log=return_log)

@readable_name('Run DESeq2 differential expression')
def differential_expression_deseq2(self, design_matrix: Union[str, Path],
Expand All @@ -3216,7 +3228,8 @@ def differential_expression_deseq2(self, design_matrix: Union[str, Path],
output_folder: Union[str, Path, None] = None, return_design_matrix: bool = False,
scaling_factors: Union[str, Path, None] = None,
cooks_cutoff: bool = True,
return_code: bool = False) -> Tuple['DESeqFilter', ...]:
return_code: bool = False, return_log: bool = False
) -> Tuple['DESeqFilter', ...]:
"""
Run differential expression analysis on the count matrix using the \
`DESeq2 <https://doi.org/10.1186/s13059-014-0550-8>`_ algorithm. \
Expand Down Expand Up @@ -3259,8 +3272,12 @@ def differential_expression_deseq2(self, design_matrix: Union[str, Path],
:type output_folder: str, Path, or None
:param return_design_matrix: if True, the function will return the sanitized design matrix used in the analysis.
:type return_design_matrix: bool (default=False)
:param return_code: if True, the function will return the R script used to generate the analysis results.
:param return_code: if True, the function will return the path to the R script \
used to generate the analysis results.
:type return_code: bool (default=False)
:param return_log: if True, the function will return the path to the analysis logfile, \
which includes session info.
:type return_log: bool (default=False)
:return: a tuple of DESeqFilter objects, one for each comparison
"""
if output_folder is not None:
Expand Down Expand Up @@ -3319,13 +3336,16 @@ def differential_expression_deseq2(self, design_matrix: Union[str, Path],
scale_factor_path, cooks_cutoff, scale_factor_ndims).run()
outputs = []
code_path = None
log_path = None
for item in r_output_dir.iterdir():
if not item.is_file():
continue
if item.suffix == '.csv':
outputs.append(DESeqFilter(item))
if item.suffix == '.R':
code_path = item
elif item.suffix == '.log':
log_path = item
if output_folder is not None:
with open(item) as infile, open(output_folder.joinpath(item.name), 'w') as outfile:
outfile.write(infile.read())
Expand All @@ -3337,15 +3357,20 @@ def differential_expression_deseq2(self, design_matrix: Union[str, Path],
warnings.warn("No R script was generated during the analysis")
else:
return_val = [return_val, code_path]
if return_log:
if log_path is None:
warnings.warn("No log file was generated during the analysis")
else:
return_val = [return_val, log_path]
return return_val

@readable_name('Run DESeq2 differential expression (simplified mode)')
def differential_expression_deseq2_simplified(self, design_matrix: Union[str, Path],
comparisons: Iterable[Tuple[str, str, str]],
r_installation_folder: Union[str, Path, Literal['auto']] = 'auto',
output_folder: Union[str, Path, None] = None,
return_design_matrix: bool = False, return_code: bool = False
) -> Tuple['DESeqFilter', ...]:
return_design_matrix: bool = False, return_code: bool = False,
return_log: bool = False) -> Tuple['DESeqFilter', ...]:
"""
Run differential expression analysis on the count matrix using the \
`DESeq2 <https://doi.org/10.1186/s13059-014-0550-8>`_ algorithm. \
Expand Down Expand Up @@ -3385,7 +3410,8 @@ def differential_expression_deseq2_simplified(self, design_matrix: Union[str, Pa
return self.differential_expression_deseq2(design_matrix, comparisons,
r_installation_folder=r_installation_folder,
output_folder=output_folder,
return_design_matrix=return_design_matrix, return_code=return_code)
return_design_matrix=return_design_matrix, return_code=return_code,
return_log=return_log)

@readable_name('Calculate fold change')
def fold_change(self, numerator: param_typing.ColumnNames, denominator: param_typing.ColumnNames,
Expand Down
3 changes: 2 additions & 1 deletion rnalysis/gui/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ def __init__(self, parent=None):

class DiffExpWindow(gui_windows.FuncExternalWindow):
EXCLUDED_PARAMS = {'self', 'comparisons', 'covariates', 'lrt_factors', 'model_factors', 'return_design_matrix',
'return_code'}
'return_code', 'return_log'}
IGNORED_WIDGETS = gui_windows.FuncExternalWindow.IGNORED_WIDGETS | {'load_design'}

__slots__ = {'comparisons': 'list of comparisons to make',
Expand Down Expand Up @@ -438,6 +438,7 @@ def get_analysis_kwargs(self):
kwargs = super().get_analysis_kwargs()
kwargs['return_design_matrix'] = True
kwargs['return_code'] = True
kwargs['return_log'] = True

kwargs['comparisons'] = self.comparisons_widgets['picker'].get_comparison_values()
if not self.simplified:
Expand Down
Loading

0 comments on commit 4ce0e72

Please sign in to comment.