Skip to content

Commit

Permalink
Release 1.18
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Jul 25, 2023
2 parents 116a87c + 6b699b5 commit 6c2c1e9
Show file tree
Hide file tree
Showing 157 changed files with 5,922 additions and 2,754 deletions.
4 changes: 3 additions & 1 deletion INSTALL
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,10 @@ Alpine Linux
Note: To install gsl-dev, it may be necessary to enable the "community"
repository in /etc/apk/repositories.

Note: some older Alpine versions use libressl-dev rather than openssl-dev.

doas apk update # Ensure the package list is up to date
doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev libressl-dev gsl-dev perl-dev
doas apk add autoconf automake make gcc musl-dev perl bash zlib-dev bzip2-dev xz-dev curl-dev openssl-dev gsl-dev perl-dev

OpenSUSE
--------
Expand Down
3 changes: 2 additions & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -723,11 +723,12 @@ Public License instead of this License. But first, please read

-----------------------------------------------------------------------------

LICENSE FOR VariantKey (https://github.com/Genomicsplc/variantkey)
LICENSE FOR VariantKey (https://github.com/tecnickcom/variantkey)

The MIT License

Copyright (c) 2017-2018 GENOMICS plc
Copyright (c) 2018-2023 Nicola Asuni - Tecnick.com

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ OBJS = main.o vcfindex.o tabix.o \
regidx.o smpl_ilist.o csq.o vcfbuf.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o read_consensus.o bam_sample.o \
vcfsort.o cols.o extsort.o dist.o abuf.o \
ccall.o em.o prob1.o kmin.o str_finder.o
ccall.o em.o prob1.o kmin.o str_finder.o gff.o
PLUGIN_OBJS = vcfplugin.o

prefix = /usr/local
Expand Down Expand Up @@ -104,7 +104,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.17
PACKAGE_VERSION = 1.18

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down Expand Up @@ -246,7 +246,7 @@ vcfgtcheck.o: vcfgtcheck.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htsli
vcfindex.o: vcfindex.c $(htslib_vcf_h) $(htslib_tbx_h) $(htslib_kstring_h) $(htslib_bgzf_h) $(bcftools_h)
vcfisec.o: vcfisec.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcftools_h) $(filter_h)
vcfmerge.o: vcfmerge.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) regidx.h $(bcftools_h) vcmp.h $(htslib_khash_h)
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h
vcfnorm.o: vcfnorm.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) rbuf.h abuf.h gff.h
vcfquery.o: vcfquery.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_str2int_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(convert_h) $(smpl_ilist_h)
vcfroh.o: vcfroh.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kseq_h) $(htslib_bgzf_h) $(bcftools_h) HMM.h $(smpl_ilist_h) $(filter_h)
vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(htslib_khash_str2int_h) $(bcftools_h) HMM.h rbuf.h
Expand Down Expand Up @@ -289,6 +289,7 @@ vcfbuf.o: vcfbuf.c $(htslib_vcf_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(bcf
abuf.o: abuf.c $(htslib_vcf_h) $(bcftools_h) rbuf.h abuf.h
extsort.o: extsort.c $(bcftools_h) extsort.h kheap.h
smpl_ilist.o: smpl_ilist.c $(bcftools_h) $(smpl_ilist_h)
gff.o: gff.c gff.h regidx.h
csq.o: csq.c $(htslib_hts_h) $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_khash_h) $(htslib_khash_str2int_h) $(htslib_kseq_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) regidx.h kheap.h $(smpl_ilist_h) rbuf.h

# test programs
Expand Down
118 changes: 118 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,121 @@
## Release 1.18 (25th July 2023)


Changes affecting the whole of bcftools, or multiple commands:

* Support auto indexing during writing BCF and VCF.gz via new `--write-index` option


Changes affecting specific commands:

* bcftools annotate

- The `-m, --mark-sites` option can be now used to mark all sites without the
need to provide the `-a` file (#1861)

- Fix a bug where the `-m` function did not respect the `--min-overlap` option (#1869)

- Fix a bug when update of INFO/END results in assertion error (#1957)

* bcftools concat

- New option `--drop-genotypes`

* bcftools consensus

- Support higher-ploidy genotypes with `-H, --haplotype` (#1892)

- Allow `--mark-ins` and `--mark-snv` with a character, similarly to `--mark-del`

* bcftools convert

- Support for conversion from tab-delimited files (CHROM,POS,REF,ALT) to sites-only VCFs

* bcftools csq

- New `--unify-chr-names` option to automatically unify different chromosome
naming conventions in the input GFF, fasta and VCF files (e.g. "chrX" vs "X")

- More versatility in parsing various flavors of GFF

- A new `--dump-gff` option to help with debugging and investigating the internals
of hGFF parsing

- When printing consequences in nonsense mediated decay transcripts, include 'NMD_transcript'
in the consequence part of the annotation. This is to make filtering easier and analogous to
VEP annotations. For example the consequence annotation
3_prime_utr|PCGF3|ENST00000430644|NMD
is newly printed as
3_prime_utr&NMD_transcript|PCGF3|ENST00000430644|NMD

* bcftools gtcheck

- Add stats for the number of sites matched in the GT-vs-GT, GT-vs-PL, etc modes. This
information is important for interpretation of the discordance score, as only the
GT-vs-GT matching can be interpreted as the number of mismatching genotypes.

* bcftools +mendelian2

- Fix in command line argument parsing, the `-p` and `-P` options were not
functioning (#1906)

* bcftools merge

- New `-M, --missing-rules` option to control the behavior of merging of vector tags
to prevent mixtures of known and missing values in tags when desired

- Use values pertaining to the unknown allele (<*> or <NON_REF>) when available
to prevent mixtures of known and missing values (#1888)

- Revamped line matching code to fix problems in gVCF merging where split gVCF blocks
would not update genotypes (#1891, #1164).

* bcftool mpileup

- Fix a bug in --indels-v2.0 which caused an endless loop when CIGAR operator 'H' or 'P'
was encountered

* bcftools norm

- The `-m, --multiallelics +` mode now preserves phasing (#1893)

- Symbolic <DEL.*> alleles are now normalized too (#1919)

- New `-g, --gff-annot` option to right-align indels in forward transcripts to follow
HGVS 3'rule (#1929)

* bcftools query

- Force newline character in formatting expression when not given explicitly

- Fix `-H` header output in formatting expressions containing newlines

* bcftools reheader

- Make `-f, --fai` aware of long contigs not representable by 32-bit integer (#1959)

* bcftools +split-vep

- Prevent a segfault when `-i/-e` use a VEP subfield not included in `-f` or `-c` (#1877)

- New `-X, --keep-sites` option complementing the existing `-x, --drop-sites` options

- Force newline character in formatting expression when not given explicitly

- Fix a subtle ambiguity: identical rows must be returned when `-s` is applied regardless
of `-f` containing the `-a` VEP tag itself or not.

* bcftools stats

- Collect new VAF (variant allele frequency) statistics from FORMAT/AD field

- When counting transitions/transversions, consider also alternate het genotypes

* plot-vcfstats

- Add three new VAF plots


## Release 1.17 (21st February 2023)


Expand Down
5 changes: 4 additions & 1 deletion bcftools.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* bcftools.h -- utility function declarations.
Copyright (C) 2013-2022 Genome Research Ltd.
Copyright (C) 2013-2023 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -49,6 +49,9 @@ void error(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2
// newline will be added by the function.
void error_errno(const char *format, ...) HTS_NORETURN HTS_FORMAT(HTS_PRINTF_FMT, 1, 2);

// For on the fly index creation with --write-index
int init_index(htsFile *fh, bcf_hdr_t *hdr, char *fname, char **idx_fname);

void bcf_hdr_append_version(bcf_hdr_t *hdr, int argc, char **argv, const char *cmd);
const char *hts_bcf_wmode(int file_type);
const char *hts_bcf_wmode2(int file_type, const char *fname);
Expand Down
12 changes: 12 additions & 0 deletions cigar_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,12 @@ static inline int cstate_seek_fwd(cigar_state_t *cs, hts_pos_t *pos_ptr, int tri
cs->icig++;
continue;
}
if ( op==BAM_CHARD_CLIP || op==BAM_CPAD )
{
cs->icig++;
continue;
}
error("FIXME: not ready for CIGAR operator %d\n",op);
}
// the read starts after pos
if ( trim_left )
Expand Down Expand Up @@ -175,6 +181,12 @@ static inline int cstate_seek_op_fwd(cigar_state_t *cs, hts_pos_t pos, int seek_
cs->icig++;
continue;
}
if ( op==BAM_CHARD_CLIP || op==BAM_CPAD )
{
cs->icig++;
continue;
}
error("FIXME: not ready for CIGAR operator %d\n",op);
}
return cs->icig < cs->ncig ? -1 : -2;
}
Expand Down
59 changes: 40 additions & 19 deletions consensus.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@
#define PICK_SHORT 8
#define PICK_IUPAC 16

#define TO_UPPER 0
#define TO_LOWER 1
#define TO_UPPER 1
#define TO_LOWER 2

typedef struct
{
Expand Down Expand Up @@ -324,7 +324,7 @@ static void init_region(args_t *args, char *line)
{
char *ss, *se = line;
while ( *se && !isspace(*se) && *se!=':' ) se++;
int from = 0, to = 0;
hts_pos_t from = 0, to = 0;
char tmp = 0, *tmp_ptr = NULL;
if ( *se )
{
Expand Down Expand Up @@ -356,7 +356,14 @@ static void init_region(args_t *args, char *line)
args->fa_frz_mod = -1;
args->fa_case = -1;
args->vcf_rbuf.n = 0;
bcf_sr_seek(args->files,line,args->fa_ori_pos);

kstring_t str = {0,0,0};
if ( from==0 ) from = 1;
if ( to==0 ) to = HTS_POS_MAX;
ksprintf(&str,"%s:%"PRIhts_pos"-%"PRIhts_pos,line,from,to);
bcf_sr_set_regions(args->files,line,0);
free(str.s);

if ( tmp_ptr ) *tmp_ptr = tmp;
fprintf(args->fp_out,">%s%s\n",args->chr_prefix?args->chr_prefix:"",line);
if ( args->chain_fname )
Expand Down Expand Up @@ -466,25 +473,37 @@ static char *mark_del(char *ref, int rlen, char *alt, int mark)
static void mark_ins(char *ref, char *alt, char mark)
{
int i, nref = strlen(ref), nalt = strlen(alt);
if ( mark=='l' )
if ( mark==TO_LOWER )
for (i=nref; i<nalt; i++) alt[i] = tolower(alt[i]);
else
else if ( mark==TO_UPPER )
for (i=nref; i<nalt; i++) alt[i] = toupper(alt[i]);
else if ( mark )
for (i=nref; i<nalt; i++) alt[i] = mark;
}
static void mark_snv(char *ref, char *alt, char mark)
{
int i, nref = strlen(ref), nalt = strlen(alt);
int n = nref < nalt ? nref : nalt;
if ( mark=='l' )
if ( mark==TO_LOWER )
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = tolower(alt[i]);
}
else
else if ( mark==TO_UPPER)
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
}
else if ( mark==TO_UPPER)
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
}
else if ( mark )
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = mark;
}
}
static void iupac_init(args_t *args, bcf1_t *rec)
{
Expand Down Expand Up @@ -1099,19 +1118,18 @@ static void usage(args_t *args)
fprintf(stderr, " -f, --fasta-ref FILE Reference sequence in fasta format\n");
fprintf(stderr, " -H, --haplotype WHICH Choose which allele to use from the FORMAT/GT field, note\n");
fprintf(stderr, " the codes are case-insensitive:\n");
fprintf(stderr, " 1: first allele from GT, regardless of phasing\n");
fprintf(stderr, " 2: second allele from GT, regardless of phasing\n");
fprintf(stderr, " N: N={1,2,3,..} is the index of the allele from GT, regardless of phasing (e.g. \"2\")\n");
fprintf(stderr, " R: REF allele in het genotypes\n");
fprintf(stderr, " A: ALT allele\n");
fprintf(stderr, " I: IUPAC code for all genotypes\n");
fprintf(stderr, " LR,LA: longer allele and REF/ALT if equal length\n");
fprintf(stderr, " SR,SA: shorter allele and REF/ALT if equal length\n");
fprintf(stderr, " 1pIu,2pIu: first/second allele for phased and IUPAC code for unphased GTs\n");
fprintf(stderr, " NpIu: index of the allele for phased and IUPAC code for unphased GTs (e.g. \"2pIu\")\n");
fprintf(stderr, " -i, --include EXPR Select sites for which the expression is true (see man page for details)\n");
fprintf(stderr, " -I, --iupac-codes Output IUPAC codes based on FORMAT/GT, use -s/-S to subset samples\n");
fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert CHAR for deletions\n");
fprintf(stderr, " --mark-ins uc|lc Highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
fprintf(stderr, " --mark-snv uc|lc Highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert character CHAR for deletions\n");
fprintf(stderr, " --mark-ins uc|lc|CHAR Highlight insertions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n");
fprintf(stderr, " --mark-snv uc|lc|CHAR Highlight substitutions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n");
fprintf(stderr, " -m, --mask FILE Replace regions according to the next --mask-with option. The default is --mask-with N\n");
fprintf(stderr, " --mask-with CHAR|uc|lc Replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)\n");
fprintf(stderr, " -M, --missing CHAR Output CHAR instead of skipping a missing genotype \"./.\"\n");
Expand Down Expand Up @@ -1163,13 +1181,15 @@ int main_consensus(int argc, char *argv[])
{
case 1 : args->mark_del = optarg[0]; break;
case 2 :
if ( !strcasecmp(optarg,"uc") ) args->mark_ins = 'u';
else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = 'l';
if ( !strcasecmp(optarg,"uc") ) args->mark_ins = TO_UPPER;
else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = TO_LOWER;
else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_ins = optarg[0];
else error("The argument is not recognised: --mark-ins %s\n",optarg);
break;
case 3 :
if ( !strcasecmp(optarg,"uc") ) args->mark_snv = 'u';
else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = 'l';
if ( !strcasecmp(optarg,"uc") ) args->mark_snv = TO_UPPER;
else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = TO_LOWER;
else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_snv = optarg[0];
else error("The argument is not recognised: --mark-snv %s\n",optarg);
break;
case 'p': args->chr_prefix = optarg; break;
Expand Down Expand Up @@ -1211,7 +1231,8 @@ int main_consensus(int argc, char *argv[])
{
char *tmp;
args->haplotype = strtol(optarg, &tmp, 10);
if ( tmp==optarg || *tmp ) error("Error: Could not parse --haplotype %s, expected numeric argument\n", optarg);
if ( tmp==optarg || (*tmp && strcasecmp(tmp,"pIu")) ) error("Error: Could not parse \"--haplotype %s\", expected number of number followed with \"pIu\"\n", optarg);
if ( *tmp ) args->allele |= PICK_IUPAC;
if ( args->haplotype <=0 ) error("Error: Expected positive integer with --haplotype\n");
}
break;
Expand Down
Loading

0 comments on commit 6c2c1e9

Please sign in to comment.