diff --git a/.circleci/config.yml b/.circleci/config.yml index da16d5e89..c10ee7e2c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,7 +13,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -23,7 +23,7 @@ jobs: pip install -e .[tests] fi - save_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py38 @@ -34,7 +34,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Running unit tests command: | @@ -56,7 +56,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py39-v2-{{ checksum "pyproject.toml" }} + key: conda-py39-v3-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -75,7 +75,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py39 - save_cache: - key: conda-py39-v2-{{ checksum "pyproject.toml" }} + key: conda-py39-v3-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py39 - persist_to_workspace: @@ -90,7 +90,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py310-v1-{{ checksum "pyproject.toml" }} + key: conda-py310-v3-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -109,7 +109,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py310 - save_cache: - key: conda-py310-v1-{{ checksum "pyproject.toml" }} + key: conda-py310-v3-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py310 - persist_to_workspace: @@ -124,7 +124,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py311-v1-{{ checksum "pyproject.toml" }} + key: conda-py311-v3-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -143,7 +143,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py311 - save_cache: - key: conda-py311-v1-{{ checksum "pyproject.toml" }} + key: conda-py311-v3-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py311 - persist_to_workspace: @@ -158,7 +158,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py312-v1-{{ checksum "pyproject.toml" }} + key: conda-py312-v3-{{ checksum "pyproject.toml" }} - run: name: Generate environment command: | @@ -177,7 +177,7 @@ jobs: mkdir /tmp/src/coverage mv /tmp/src/tedana/.coverage /tmp/src/coverage/.coverage.py312 - save_cache: - key: conda-py312-v1-{{ checksum "pyproject.toml" }} + key: conda-py312-v3-{{ checksum "pyproject.toml" }} paths: - /opt/conda/envs/tedana_py312 - persist_to_workspace: @@ -192,7 +192,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Style check command: | @@ -208,7 +208,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m @@ -233,7 +233,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m @@ -258,7 +258,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m @@ -283,7 +283,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m @@ -308,7 +308,7 @@ jobs: steps: - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Run integration tests no_output_timeout: 40m @@ -335,7 +335,7 @@ jobs: at: /tmp - checkout - restore_cache: - key: conda-py38-v2-{{ checksum "pyproject.toml" }} + key: conda-py38-v3-{{ checksum "pyproject.toml" }} - run: name: Merge coverage files command: | diff --git a/docs/approach.rst b/docs/approach.rst index 29d341d62..3091f30ec 100644 --- a/docs/approach.rst +++ b/docs/approach.rst @@ -334,6 +334,8 @@ Next, ``tedana`` applies TE-dependent independent component analysis (ICA) in order to identify and remove TE-independent (i.e., non-BOLD noise) components. The dimensionally reduced optimally combined data are first subjected to ICA in order to fit a mixing matrix to the whitened data. +``tedana`` can use a single interation of FastICA or multiple interations of robustICA, +with an explanation of those approaches `in our FAQ`_. This generates a number of independent timeseries (saved as **desc-ICA_mixing.tsv**), as well as parameter estimate maps which show the spatial loading of these components on the brain (**desc-ICA_components.nii.gz**). @@ -380,6 +382,8 @@ yielding a denoised timeseries, which is saved as **desc-denoised_bold.nii.gz**. .. image:: /_static/a15_denoised_data_timeseries.png +.. _in our FAQ: faq.html#tedana-what-is-the-right-number-of-ica-components-what-options-let-me-get-it +.. _These decision trees are detailed here: included_decision_trees.html ******************************* Manual classification with RICA diff --git a/docs/faq.rst b/docs/faq.rst index 7a8cd79f8..046ff7ef6 100644 --- a/docs/faq.rst +++ b/docs/faq.rst @@ -93,11 +93,78 @@ The TEDICA step may fail to converge if TEDPCA is either too strict With updates to the ``tedana`` code, this issue is now rare, but it may happen when preprocessing has not been applied to the data, or when improper steps have been applied to the data (e.g. rescaling, nuisance regression). +It can also still happen when everything is seemingly correct +(see the answer to the next question). If you are confident that your data have been preprocessed correctly prior to applying tedana, and you encounter this problem, please submit a question to `NeuroStars`_. .. _NeuroStars: https://neurostars.org +********************************************************************************* +[tedana] What is the right number of ICA components & what options let me get it? +********************************************************************************* + +Part of the PCA step in ``tedana`` processing involves identifying the number of +components that contain meaningful signal. +The PCA components are then used to calculate the same number of ICA components. +The ``--tedpca`` option includes several options to identify the "correct" number +of PCA components. +``kundu`` and ``kundu-stabilize`` use several echo-based criteria to exclude PCA +components that are unlikely to contain T2* or S0 signal. +``mdl`` (conservative & fewest components), ``kic``, +& ``aic`` (liberal & more components) use `MAPCA`_. +Within the same general method, each uses a cost function to find a minimum +where more components no longer model meaningful variance. +For some datasets we see all methods fail and result in too few or too many components. +There is no consistent number of components or % variance explained to define the correct number. +The correct number of components will depend on the noise levels of the data. +For example, smaller voxels will results in more thermal noise and less total variance explained. +A dataset with more head motion artifacts will have more variance explained, +since more structured signal is within the head motion artifacts. +The clear failure cases are extreme. That is getting less than 1/5 the number of components +compared to time points or having nearly as many components as time points. +We are working on identifying why this happens and adding better solutions. +Our current guess is that most of the above methods assume data are +independant and identically distributed (IID), +and signal leakage from in-slice and multi-slice accelleration may violate this assumption. + +We have one option that is generally useful and is also a partial solution. +``--ica_method robustica`` will run `robustica`_. +This is a method that, for a given number of PCA components, +will repeatedly run ICA and identify components that are stable across iterations. +While running ICA multiple times will slow processing, as a general benefit, +this means that the ICA results are less sensitive to the initialization parameters, +computer hardware, and software versions. +This will result in better stability and replicability of ICA results. +Additionally, `robustica`_ almost always results in fewer components than initially prescripted, +since there are fewer stable components across interations than the total number of components. +This means, even if the initial PCA component estimate is a bit off, +the number of resulting robust ICA components will represent stable information in the data. +For a dataset where the PCA comoponent estimation methods are failing, +one could use ``--tedpca`` with a fixed integer for a constant number of components, +that is on the high end of the typical number of components for a study, +and then `robustica`_ will reduce the number of components to only find stable information. +That said, if the fixed PCA component number is too high, +then the method will have too many unstable components, +and if the fixed PCA component number is too low, then there will be even fewer ICA components. +With this approach, the number of ICA components is more consistent, +but is still sensitive to the intial number of PCA components. +For example, for a single dataset 60 PCA components might result in 46 stable ICA components, +while 55 PCA components might results in 43 stable ICA components. +We are still testing how these interact to give better recommendations for even more stable results. +While the TEDANA developers expect that ``--ica_method robustica`` may become +the default configuration in future TEDANA versions, +it is first being released to the public as a non-default option +in hope of gaining insight into its behaviour +across a broader range of multi-echo fMRI data. +If users are having trouble with PCA component estimation failing on a dataset, +we recommend using RobustICA; +and we invite users to send us feedback on its behavior and efficacy. + + +.. _MAPCA: https://github.com/ME-ICA/mapca +.. _robustica: https://github.com/CRG-CNAG/robustica + .. _manual classification: ******************************************************************************** diff --git a/pyproject.toml b/pyproject.toml index b6e7be9f2..086c5b302 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ dependencies = [ "pandas>=2.0,<=2.2.2", "pybtex", "pybtex-apa-style", + "robustica>=0.1.4,<=0.1.4", "scikit-learn>=0.21, <=1.5.2", "scipy>=1.2.0, <=1.14.1", "threadpoolctl", diff --git a/tedana/config.py b/tedana/config.py new file mode 100644 index 000000000..746d41637 --- /dev/null +++ b/tedana/config.py @@ -0,0 +1,19 @@ +"""Setting default values for ICA decomposition.""" + +DEFAULT_ICA_METHOD = "fastica" +DEFAULT_N_MAX_ITER = 500 +DEFAULT_N_MAX_RESTART = 10 +DEFAULT_SEED = 42 + + +"""Setting values for number of robust runs.""" + +DEFAULT_N_ROBUST_RUNS = 30 +MIN_N_ROBUST_RUNS = 5 +MAX_N_ROBUST_RUNS = 500 +WARN_N_ROBUST_RUNS = 200 + + +"""Setting the warning threshold for the index quality.""" + +WARN_IQ = 0.6 diff --git a/tedana/decomposition/ica.py b/tedana/decomposition/ica.py index b1e88e908..6ab291c43 100644 --- a/tedana/decomposition/ica.py +++ b/tedana/decomposition/ica.py @@ -4,15 +4,33 @@ import warnings import numpy as np +from robustica import RobustICA from scipy import stats from sklearn.decomposition import FastICA +from tedana.config import ( + DEFAULT_ICA_METHOD, + DEFAULT_N_MAX_ITER, + DEFAULT_N_MAX_RESTART, + DEFAULT_N_ROBUST_RUNS, + WARN_IQ, + WARN_N_ROBUST_RUNS, +) + LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") -def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): - """Perform ICA on ``data`` and return mixing matrix. +def tedica( + data, + n_components, + fixed_seed, + ica_method=DEFAULT_ICA_METHOD, + n_robust_runs=DEFAULT_N_ROBUST_RUNS, + maxit=DEFAULT_N_MAX_ITER, + maxrestart=DEFAULT_N_MAX_RESTART, +): + """Perform ICA on `data` with the user selected ica method and returns mixing matrix. Parameters ---------- @@ -20,9 +38,13 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): Dimensionally reduced optimally combined functional data, where `S` is samples and `T` is time n_components : :obj:`int` - Number of components retained from PCA decomposition + Number of components retained from PCA decomposition. fixed_seed : :obj:`int` - Seed for ensuring reproducibility of ICA results + Seed for ensuring reproducibility of ICA results. + ica_method : :obj: `str` + selected ICA method, can be fastica or robustica. + n_robust_runs : :obj: `int` + selected number of robust runs when robustica is used. Default is 30. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional @@ -37,10 +59,6 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): where `C` is components and `T` is the same as in `data` fixed_seed : :obj:`int` Random seed from final decomposition. - - Notes - ----- - Uses `sklearn` implementation of FastICA for decomposition """ warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd") RepLGR.info( @@ -48,6 +66,168 @@ def tedica(data, n_components, fixed_seed, maxit=500, maxrestart=10): "decompose the dimensionally reduced dataset." ) + ica_method = ica_method.lower() + + if ica_method == "robustica": + mmix, fixed_seed = r_ica( + data, + n_components=n_components, + fixed_seed=fixed_seed, + n_robust_runs=n_robust_runs, + max_it=maxit, + ) + elif ica_method == "fastica": + mmix, fixed_seed = f_ica( + data, + n_components=n_components, + fixed_seed=fixed_seed, + maxit=maxit, + maxrestart=maxrestart, + ) + else: + raise ValueError("The selected ICA method is invalid!") + + return mmix, fixed_seed + + +def r_ica(data, n_components, fixed_seed, n_robust_runs, max_it): + """Perform robustica on `data` and returns mixing matrix. + + Parameters + ---------- + data : (S x T) :obj:`numpy.ndarray` + Dimensionally reduced optimally combined functional data, where `S` is + samples and `T` is time + n_components : :obj:`int` + Number of components retained from PCA decomposition. + fixed_seed : :obj:`int` + Seed for ensuring reproducibility of ICA results. + n_robust_runs : :obj: `int' + selected number of robust runs when robustica is used. Default is 30. + maxit : :obj:`int`, optional + Maximum number of iterations for ICA. Default is 500. + + Returns + ------- + mmix : (T x C) :obj:`numpy.ndarray` + Z-scored mixing matrix for converting input data to component space, + where `C` is components and `T` is the same as in `data` + fixed_seed : :obj:`int` + Random seed from final decomposition. + """ + if n_robust_runs > WARN_N_ROBUST_RUNS: + LGR.warning( + "The selected n_robust_runs is a very big number! The process will take a long time!" + ) + + RepLGR.info("RobustICA package was used for ICA decomposition \\citep{anglada2022robustica}.") + + if fixed_seed == -1: + fixed_seed = np.random.randint(low=1, high=1000) + + robust_method = "DBSCAN" + robust_ica_converged = False + while not robust_ica_converged: + try: + robust_ica = RobustICA( + n_components=n_components, + robust_runs=n_robust_runs, + whiten="arbitrary-variance", + max_iter=max_it, + random_state=fixed_seed, + robust_dimreduce=False, + fun="logcosh", + robust_method=robust_method, + ) + + s, mmix = robust_ica.fit_transform(data) + q = robust_ica.evaluate_clustering( + robust_ica.S_all, + robust_ica.clustering.labels_, + robust_ica.signs_, + robust_ica.orientation_, + ) + robust_ica_converged = True + + except Exception: + if robust_method == "DBSCAN": + # if RobustICA failed wtih DBSCAN, run again wtih AgglomerativeClustering + robust_method = "AgglomerativeClustering" + LGR.warning( + "DBSCAN clustering method did not converge. " + "Agglomerative clustering will be tried now." + ) + else: + raise ValueError("RobustICA failed to converge") + + LGR.info( + f"The {robust_method} clustering algorithm was used for clustering " + f"components across different runs" + ) + + iq = np.array( + np.mean(q[q["cluster_id"] >= 0].iq) + ) # Excluding outliers (cluster -1) from the index quality calculation + + if iq < WARN_IQ: + LGR.warning( + f"The resultant mean Index Quality is low ({iq}). It is recommended to rerun the " + "process with a different seed." + ) + + mmix = mmix[ + :, q["cluster_id"] >= 0 + ] # Excluding outliers (cluster -1) when calculating the mixing matrix + mmix = stats.zscore(mmix, axis=0) + + LGR.info( + f"RobustICA with {n_robust_runs} robust runs and seed {fixed_seed} was used. " + f"The mean Index Quality is {iq}." + ) + + no_outliers = np.count_nonzero(robust_ica.clustering.labels_ == -1) + if no_outliers: + LGR.info( + f"The {robust_method} clustering algorithm detected outliers when clustering " + f"components for different runs. These outliers are excluded when calculating " + f"the index quality and the mixing matrix to maximise the robustness of the " + f"decomposition." + ) + + return mmix, fixed_seed + + +def f_ica(data, n_components, fixed_seed, maxit, maxrestart): + """Perform FastICA on `data` and returns mixing matrix. + + Parameters + ---------- + data : :obj:`numpy.ndarray` + Dimensionally reduced optimally combined functional data, where `S` is + samples and `T` is time + n_components : :obj:`int` + Number of components retained from PCA decomposition + fixed_seed : :obj:`int` + Seed for ensuring reproducibility of ICA results + maxit : :obj:`int`, optional + Maximum number of iterations for ICA. Default is 500. + maxrestart : :obj:`int`, optional + Maximum number of attempted decompositions to perform with different + random seeds. ICA will stop running if there is convergence prior to + reaching this limit. Default is 10. + + Returns + ------- + mmix : (T x C) :obj:`numpy.ndarray` + Z-scored mixing matrix for converting input data to component space, + where `C` is components and `T` is the same as in `data` + fixed_seed : :obj:`int` + Random seed from final decomposition. + + Notes + ----- + Uses `sklearn` implementation of FastICA for decomposition + """ if fixed_seed == -1: fixed_seed = np.random.randint(low=1, high=1000) diff --git a/tedana/gscontrol.py b/tedana/gscontrol.py index 244d409cb..a17b3a304 100644 --- a/tedana/gscontrol.py +++ b/tedana/gscontrol.py @@ -128,7 +128,6 @@ def gscontrol_raw( data_cat_nogs = data_cat.copy() # don't overwrite data_cat for echo in range(n_echos): data_echo_masked = data_cat_nogs[temporal_mean_mask, echo, :] - # Mean center echo's data over time echo_mean = data_echo_masked.mean(axis=-1, keepdims=True) data_echo_masked -= echo_mean diff --git a/tedana/resources/references.bib b/tedana/resources/references.bib index 68deb2c5d..97087170f 100644 --- a/tedana/resources/references.bib +++ b/tedana/resources/references.bib @@ -334,6 +334,16 @@ @article{tedana_decision_trees doi = {10.6084/m9.figshare.25251433.v2} } +@Article{anglada2022robustica, + Author = {Anglada-Girotto Miquel and Miravet-Verde Samuel and Serrano Luis and Head Sarah}, + Title = {robustica: customizable robust independent component analysis}, + Journal = {BMC Bioinformatics}, + Volume = {23}, + Number = {519}, + doi = {10.1186/s12859-022-05043-9}, + year = 2022 +} + @article{power2018ridding, title={Ridding fMRI data of motion-related influences: Removal of signals with distinct spatial and physical bases in multiecho data}, author={Power, Jonathan D and Plitt, Mark and Gotts, Stephen J and Kundu, Prantik and Voon, Valerie and Bandettini, Peter A and Martin, Alex}, diff --git a/tedana/tests/data/nih_five_echo_outputs_verbose.txt b/tedana/tests/data/nih_five_echo_outputs_verbose.txt index 5847b114b..2d54db98b 100644 --- a/tedana/tests/data/nih_five_echo_outputs_verbose.txt +++ b/tedana/tests/data/nih_five_echo_outputs_verbose.txt @@ -121,26 +121,6 @@ figures/sub-01_comp_028.png figures/sub-01_comp_029.png figures/sub-01_comp_030.png figures/sub-01_comp_031.png -figures/sub-01_comp_032.png -figures/sub-01_comp_033.png -figures/sub-01_comp_034.png -figures/sub-01_comp_035.png -figures/sub-01_comp_036.png -figures/sub-01_comp_037.png -figures/sub-01_comp_038.png -figures/sub-01_comp_039.png -figures/sub-01_comp_040.png -figures/sub-01_comp_041.png -figures/sub-01_comp_042.png -figures/sub-01_comp_043.png -figures/sub-01_comp_044.png -figures/sub-01_comp_045.png -figures/sub-01_comp_046.png -figures/sub-01_comp_047.png -figures/sub-01_comp_048.png -figures/sub-01_comp_049.png -figures/sub-01_comp_050.png -figures/sub-01_comp_051.png figures/sub-01_rmse_brain.svg figures/sub-01_rmse_timeseries.svg figures/sub-01_s0_brain.svg diff --git a/tedana/tests/test_integration.py b/tedana/tests/test_integration.py index 142efc175..eb00e80d8 100644 --- a/tedana/tests/test_integration.py +++ b/tedana/tests/test_integration.py @@ -129,6 +129,8 @@ def test_integration_five_echo(skip_integration): tedana_cli.tedana_workflow( data=datalist, tes=echo_times, + ica_method="robustica", + n_robust_runs=4, out_dir=out_dir, tedpca=0.95, fittype="curvefit", @@ -174,6 +176,7 @@ def test_integration_four_echo(skip_integration): data=datalist, mixm=op.join(op.dirname(datalist[0]), "desc-ICA_mixing_static.tsv"), tes=[11.8, 28.04, 44.28, 60.52], + ica_method="fastica", out_dir=out_dir, tedpca="kundu-stabilize", gscontrol=["gsr", "mir"], diff --git a/tedana/utils.py b/tedana/utils.py index 9c2c0c67b..5914862e5 100644 --- a/tedana/utils.py +++ b/tedana/utils.py @@ -18,12 +18,14 @@ from nilearn._utils import check_niimg from numpy import __version__ as numpy_version from pandas import __version__ as pandas_version +from robustica import __version__ as robustica_version from scipy import __version__ as scipy_version from scipy import ndimage from scipy.special import lpmv from sklearn import __version__ as sklearn_version from sklearn.utils import check_array from threadpoolctl import __version__ as threadpoolctl_version +from tqdm import __version__ as tqdm_version LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -615,9 +617,11 @@ def get_system_version_info(): "nilearn": nilearn_version, "numpy": numpy_version, "pandas": pandas_version, + "robustica": robustica_version, "scikit-learn": sklearn_version, "scipy": scipy_version, "threadpoolctl": threadpoolctl_version, + "tqdm": tqdm_version, } return { diff --git a/tedana/workflows/parser_utils.py b/tedana/workflows/parser_utils.py index b0db74bdc..db0f13b37 100644 --- a/tedana/workflows/parser_utils.py +++ b/tedana/workflows/parser_utils.py @@ -3,6 +3,8 @@ import argparse import os.path as op +from tedana.config import MAX_N_ROBUST_RUNS, MIN_N_ROBUST_RUNS + def check_tedpca_value(string, is_parser=True): """ @@ -33,6 +35,31 @@ def check_tedpca_value(string, is_parser=True): return intarg +def check_n_robust_runs_value(string, is_parser=True): + """ + Check n_robust_runs argument. + + Check if argument is an int between MIN_N_ROBUST_RUNS and MAX_N_ROBUST_RUNS. + """ + error = argparse.ArgumentTypeError if is_parser else ValueError + try: + intarg = int(string) + except ValueError: + msg = ( + f"Argument to n_robust_runs must be an integer " + f"between {MIN_N_ROBUST_RUNS} and {MAX_N_ROBUST_RUNS}." + ) + raise error(msg) + + if not (MIN_N_ROBUST_RUNS <= intarg <= MAX_N_ROBUST_RUNS): + raise error( + f"n_robust_runs must be an integer between {MIN_N_ROBUST_RUNS} " + f"and {MAX_N_ROBUST_RUNS}." + ) + else: + return intarg + + def is_valid_file(parser, arg): """Check if argument is existing file.""" if not op.isfile(arg) and arg is not None: diff --git a/tedana/workflows/tedana.py b/tedana/workflows/tedana.py index ec9e589d6..e2b3858bf 100644 --- a/tedana/workflows/tedana.py +++ b/tedana/workflows/tedana.py @@ -29,9 +29,20 @@ utils, ) from tedana.bibtex import get_description_references +from tedana.config import ( + DEFAULT_ICA_METHOD, + DEFAULT_N_MAX_ITER, + DEFAULT_N_MAX_RESTART, + DEFAULT_N_ROBUST_RUNS, + DEFAULT_SEED, +) from tedana.selection.component_selector import ComponentSelector from tedana.stats import computefeats2 -from tedana.workflows.parser_utils import check_tedpca_value, is_valid_file +from tedana.workflows.parser_utils import ( + check_n_robust_runs_value, + check_tedpca_value, + is_valid_file, +) LGR = logging.getLogger("GENERAL") RepLGR = logging.getLogger("REPORT") @@ -156,7 +167,7 @@ def _get_parser(): "were distributed with MEICA. " "Users may also provide a float from 0 to 1, " "in which case components will be selected based on the " - "cumulative variance explained or an integer greater than 1" + "cumulative variance explained or an integer greater than 1 " "in which case the specificed number of components will be " "selected." ), @@ -169,7 +180,7 @@ def _get_parser(): "Decision tree to use. You may use a " "packaged tree (tedana_orig, meica, minimal) or supply a JSON " "file which matches the decision tree file " - "specification. Minimal still being tested with more" + "specification. Minimal still being tested with more " "details in docs" ), default="tedana_orig", @@ -180,12 +191,26 @@ def _get_parser(): type=lambda x: is_valid_file(parser, x), help=( "File containing external regressors to compare to ICA component be used in the " - "decision tree. For example, to identify components fit head motion time series." + "decision tree. For example, to identify components fit head motion time series. " "The file must be a TSV file with the same number of rows as the number of volumes in " "the input data. Column labels and statistical tests are defined with external_labels." ), default=None, ) + optional.add_argument( + "--ica_method", + dest="ica_method", + help=( + "The applied ICA method. " + "fastica runs FastICA from sklearn once with the seed value. " + "robustica will run FastICA n_robust_runs times and uses " + "clustering methods to overcome the randomness of the FastICA algorithm. " + "robustica will be slower." + ), + choices=["robustica", "fastica"], + type=str.lower, + default=DEFAULT_ICA_METHOD, + ) optional.add_argument( "--seed", dest="fixed_seed", @@ -195,9 +220,22 @@ def _get_parser(): "Value used for random initialization of ICA " "algorithm. Set to an integer value for " "reproducible ICA results. Set to -1 for " - "varying results across ICA calls. " + "varying results across ICA calls. This " + "applies to both fastica and robustica methods." + ), + default=DEFAULT_SEED, + ) + optional.add_argument( + "--n_robust_runs", + dest="n_robust_runs", + metavar="[5-500]", + type=check_n_robust_runs_value, + help=( + "The number of times robustica will run. " + "This is only effective when ica_method is " + "set to robustica." ), - default=42, + default=DEFAULT_N_ROBUST_RUNS, ) optional.add_argument( "--maxit", @@ -205,7 +243,7 @@ def _get_parser(): metavar="INT", type=int, help=("Maximum number of iterations for ICA."), - default=500, + default=DEFAULT_N_MAX_ITER, ) optional.add_argument( "--maxrestart", @@ -219,7 +257,7 @@ def _get_parser(): "convergence is achieved before maxrestart " "attempts, ICA will finish early." ), - default=10, + default=DEFAULT_N_MAX_RESTART, ) optional.add_argument( "--tedort", @@ -238,7 +276,7 @@ def _get_parser(): "Perform additional denoising to remove " "spatially diffuse noise. " "This argument can be single value or a space " - "delimited list" + "delimited list." ), choices=["mir", "gsr"], default="", @@ -347,10 +385,12 @@ def tedana_workflow( combmode="t2s", tree="tedana_orig", external_regressors=None, + ica_method=DEFAULT_ICA_METHOD, + n_robust_runs=DEFAULT_N_ROBUST_RUNS, tedpca="aic", - fixed_seed=42, - maxit=500, - maxrestart=10, + fixed_seed=DEFAULT_SEED, + maxit=DEFAULT_N_MAX_ITER, + maxrestart=DEFAULT_N_MAX_RESTART, tedort=False, gscontrol=None, no_reports=False, @@ -417,6 +457,16 @@ def tedana_workflow( The file must be a TSV file with the same number of rows as the number of volumes in the input data. Each column in the file will be treated as a separate regressor. Default is None. + ica_method : {'fastica', 'robustica'}, optional + The applied ICA method. fastica runs FastICA from sklearn + once with the seed value. 'robustica' will run + 'FastICA' n_robust_runs times and uses clustering methods to overcome + the randomness of the FastICA algorithm. + robustica will be slower. + Default is 'fastica' + n_robust_runs : :obj:`int`, optional + The number of times robustica will run. This is only effective when 'ica_method' is + set to 'robustica'. tedpca : {'mdl', 'aic', 'kic', 'kundu', 'kundu-stabilize', float, int}, optional Method with which to select components in TEDPCA. If a float is provided, then it is assumed to represent percentage of variance @@ -426,8 +476,8 @@ def tedana_workflow( Default is 'aic'. fixed_seed : :obj:`int`, optional Value passed to ``mdp.numx_rand.seed()``. - Set to a positive integer value for reproducible ICA results; - otherwise, set to -1 for varying results across calls. + Set to a positive integer value for reproducible ICA results (fastica/robustica); + otherwise, set to -1 for varying results across ICA (fastica/robustica) calls. maxit : :obj:`int`, optional Maximum number of iterations for ICA. Default is 500. maxrestart : :obj:`int`, optional @@ -731,7 +781,13 @@ def tedana_workflow( seed = fixed_seed while keep_restarting: mmix, seed = decomposition.tedica( - dd, n_components, seed, maxit, maxrestart=(maxrestart - n_restarts) + dd, + n_components, + seed, + ica_method, + n_robust_runs, + maxit, + maxrestart=(maxrestart - n_restarts), ) seed += 1 n_restarts = seed - fixed_seed