Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid name space conflict with bwa #10

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ sketch of the example:
int main(int argc, char *argv[])
{
int i, n_seqs, n_utgs;
bseq1_t *seqs; // array of input sequences
fml_seq1_t *seqs; // array of input sequences
fml_utg_t *utgs; // array of output unitigs
fml_opt_t opt;
if (argc == 1) return 1; // do nothing if there is no input file
seqs = bseq_read(argv[1], &n_seqs); // or fill the array with callers' functions
seqs = fml_seq_read(argv[1], &n_seqs); // or fill the array with callers' functions
fml_opt_init(&opt); // initialize parameters
utgs = fml_assemble(&opt, n_seqs, seqs, &n_utgs); // assemble!
for (i = 0; i < n_utgs; ++i) // output in fasta
Expand Down
148 changes: 20 additions & 128 deletions bfc.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,12 @@
#include "kmer.h"
#include "internal.h"
#include "fml.h"
#include "bfc.h"

/*******************
*** BFC options ***
*******************/

typedef struct {
int n_threads, q, k, l_pre;
int min_cov; // a k-mer is considered solid if the count is no less than this

int max_end_ext;
int win_multi_ec;
float min_trim_frac;

// these ec options cannot be changed on the command line
int w_ec, w_ec_high, w_absent, w_absent_high;
int max_path_diff, max_heap;
} bfc_opt_t;

void bfc_opt_init(bfc_opt_t *opt)
{
memset(opt, 0, sizeof(bfc_opt_t));
Expand All @@ -46,26 +34,6 @@ void bfc_opt_init(bfc_opt_t *opt)
opt->max_heap = 100;
}

/**********************
*** K-mer counting ***
**********************/

#define CNT_BUF_SIZE 256

typedef struct { // cache to reduce locking
uint64_t y[2];
int is_high;
} insbuf_t;

typedef struct {
int k, q;
int n_seqs;
const bseq1_t *seqs;
bfc_ch_t *ch;
int *n_buf;
insbuf_t **buf;
} cnt_step_t;

bfc_kmer_t bfc_kmer_null = {{0,0,0,0}};

static int bfc_kmer_bufclear(cnt_step_t *cs, int forced, int tid)
Expand Down Expand Up @@ -97,7 +65,7 @@ static void bfc_kmer_insert(cnt_step_t *cs, const bfc_kmer_t *x, int is_high, in
static void worker_count(void *_data, long k, int tid)
{
cnt_step_t *cs = (cnt_step_t*)_data;
const bseq1_t *s = &cs->seqs[k];
const fml_seq1_t *s = &cs->seqs[k];
int i, l;
bfc_kmer_t x = bfc_kmer_null;
uint64_t qmer = 0, mask = (1ULL<<cs->k) - 1;
Expand All @@ -111,7 +79,7 @@ static void worker_count(void *_data, long k, int tid)
}
}

struct bfc_ch_s *fml_count(int n, const bseq1_t *seq, int k, int q, int l_pre, int n_threads)
struct bfc_ch_s *fml_count(int n, const fml_seq1_t *seq, int k, int q, int l_pre, int n_threads)
{
int i;
cnt_step_t cs;
Expand All @@ -127,34 +95,6 @@ struct bfc_ch_s *fml_count(int n, const bseq1_t *seq, int k, int q, int l_pre, i
return cs.ch;
}

/***************
*** Correct ***
***************/

#define BFC_MAX_KMER 63
#define BFC_MAX_BF_SHIFT 37

#define BFC_MAX_PATHS 4
#define BFC_EC_HIST 5
#define BFC_EC_HIST_HIGH 2

#define BFC_EC_MIN_COV_COEF .1

/**************************
* Sequence struct for ec *
**************************/

#include "kvec.h"

typedef struct { // NOTE: unaligned memory
uint8_t b:3, q:1, ob:3, oq:1;
uint8_t dummy;
uint16_t lcov:6, hcov:6, solid_end:1, high_end:1, ec:1, absent:1;
int i;
} ecbase_t;

typedef kvec_t(ecbase_t) ecseq_t;

static int bfc_seq_conv(const char *s, const char *q, int qthres, ecseq_t *seq)
{
int i, l;
Expand Down Expand Up @@ -264,53 +204,6 @@ uint64_t bfc_ec_best_island(int k, const ecseq_t *s)
return max > 0? (uint64_t)(max_i - max - k + 1) << 32 | max_i : 0;
}

/********************
* Correct one read *
********************/

#include "ksort.h"

#define ECCODE_MISC 1
#define ECCODE_MANY_N 2
#define ECCODE_NO_SOLID 3
#define ECCODE_UNCORR_N 4
#define ECCODE_MANY_FAIL 5

typedef struct {
uint32_t ec_code:3, brute:1, n_ec:14, n_ec_high:14;
uint32_t n_absent:24, max_heap:8;
} ecstat_t;

typedef struct {
uint8_t ec:1, ec_high:1, absent:1, absent_high:1, b:4;
} bfc_penalty_t;

typedef struct {
int tot_pen;
int i; // base position
int k; // position in the stack
int32_t ecpos_high[BFC_EC_HIST_HIGH];
int32_t ecpos[BFC_EC_HIST];
bfc_kmer_t x;
} echeap1_t;

typedef struct {
int parent, i, tot_pen;
uint8_t b;
bfc_penalty_t pen;
uint16_t cnt;
} ecstack1_t;

typedef struct {
const bfc_opt_t *opt;
const bfc_ch_t *ch;
kvec_t(echeap1_t) heap;
kvec_t(ecstack1_t) stack;
ecseq_t seq, tmp, ec[2];
int mode;
ecstat_t ori_st;
} bfc_ec1buf_t;

#define heap_lt(a, b) ((a).tot_pen > (b).tot_pen)
KSORT_INIT(ec, echeap1_t, heap_lt)

Expand Down Expand Up @@ -567,20 +460,7 @@ ecstat_t bfc_ec1(bfc_ec1buf_t *e, char *seq, char *qual)
return s;
}

/********************
* Error correction *
********************/

typedef struct {
const bfc_opt_t *opt;
const bfc_ch_t *ch;
bfc_ec1buf_t **e;
int64_t n_processed;
int n_seqs, flt_uniq;
bseq1_t *seqs;
} ec_step_t;

static uint64_t max_streak(int k, const bfc_ch_t *ch, const bseq1_t *s)
static uint64_t max_streak(int k, const bfc_ch_t *ch, const fml_seq1_t *s)
{
int i, l;
uint64_t max = 0, t = 0;
Expand All @@ -602,7 +482,7 @@ static uint64_t max_streak(int k, const bfc_ch_t *ch, const bseq1_t *s)
static void worker_ec(void *_data, long k, int tid)
{
ec_step_t *es = (ec_step_t*)_data;
bseq1_t *s = &es->seqs[k];
fml_seq1_t *s = &es->seqs[k];
if (es->flt_uniq) {
uint64_t max;
max = max_streak(es->opt->k, es->ch, s);
Expand All @@ -624,7 +504,7 @@ static void worker_ec(void *_data, long k, int tid)
} else bfc_ec1(es->e[tid], s->seq, s->qual);
}

float fml_correct_core(const fml_opt_t *opt, int flt_uniq, int n, bseq1_t *seq)
float fml_correct_core(const fml_opt_t *opt, int flt_uniq, int n, fml_seq1_t *seq)
{
bfc_ch_t *ch;
int i, mode;
Expand Down Expand Up @@ -663,12 +543,24 @@ float fml_correct_core(const fml_opt_t *opt, int flt_uniq, int n, bseq1_t *seq)
return kcov;
}

float fml_correct(const fml_opt_t *opt, int n, bseq1_t *seq)
// Added by jwala for use in libSeqLib
void kmer_correct(ec_step_t * es, int mode, bfc_ch_t * ch) {
int i = 0;
es->e = (bfc_ec1buf_t**)calloc(es->opt->n_threads, sizeof(void*)); //jwala added cast
for (i = 0; i < es->opt->n_threads; ++i)
es->e[i] = ec1buf_init(es->opt, ch), es->e[i]->mode = mode;
kt_for(es->opt->n_threads, worker_ec, es, es->n_seqs);
for (i = 0; i < es->opt->n_threads; ++i)
ec1buf_destroy(es->e[i]);
free(es->e);
}

float fml_correct(const fml_opt_t *opt, int n, fml_seq1_t *seq)
{
return fml_correct_core(opt, 0, n, seq);
}

float fml_fltuniq(const fml_opt_t *opt, int n, bseq1_t *seq)
float fml_fltuniq(const fml_opt_t *opt, int n, fml_seq1_t *seq)
{
return fml_correct_core(opt, 1, n, seq);
}
153 changes: 153 additions & 0 deletions bfc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#ifndef AC_BFC_H__
#define AC_BFC_H__

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include "htab.h"
/* #include "kmer.h" ... this is actually included by htab.h */
#include "internal.h"
#include "fml.h"
#include "khash.h"

/* Andreas Tille <[email protected]>: Its not clear where jwala took this from and what its purpose might be - commenting out for the moment
#define _cnt_eq(a, b) ((a)>>14 == (b)>>14)
#define _cnt_hash(a) ((a)>>14)
KHASH_INIT(cnt, uint64_t, char, 0, _cnt_hash, _cnt_eq)
typedef khash_t(cnt) cnthash_t;

struct bfc_ch_s {
int k;
cnthash_t **h;
// private
int l_pre;
};
*/

typedef struct {
int n_threads, q, k, l_pre;
int min_cov; // a k-mer is considered solid if the count is no less than this

int max_end_ext;
int win_multi_ec;
float min_trim_frac;

// these ec options cannot be changed on the command line
int w_ec, w_ec_high, w_absent, w_absent_high;
int max_path_diff, max_heap;
} bfc_opt_t;

/**********************
*** K-mer counting ***
**********************/

#define CNT_BUF_SIZE 256

typedef struct { // cache to reduce locking
uint64_t y[2];
int is_high;
} insbuf_t;

typedef struct {
int k, q;
int n_seqs;
const fml_seq1_t *seqs;
bfc_ch_t *ch;
int *n_buf;
insbuf_t **buf;
} cnt_step_t;

/***************
*** Correct ***
***************/

#define BFC_MAX_KMER 63
#define BFC_MAX_BF_SHIFT 37

#define BFC_MAX_PATHS 4
#define BFC_EC_HIST 5
#define BFC_EC_HIST_HIGH 2

#define BFC_EC_MIN_COV_COEF .1

/**************************
* Sequence struct for ec *
**************************/

#include "kvec.h"

typedef struct { // NOTE: unaligned memory
uint8_t b:3, q:1, ob:3, oq:1;
uint8_t dummy;
uint16_t lcov:6, hcov:6, solid_end:1, high_end:1, ec:1, absent:1;
int i;
} ecbase_t;

typedef kvec_t(ecbase_t) ecseq_t;

/********************
* Correct one read *
********************/

#include "ksort.h"

#define ECCODE_MISC 1
#define ECCODE_MANY_N 2
#define ECCODE_NO_SOLID 3
#define ECCODE_UNCORR_N 4
#define ECCODE_MANY_FAIL 5

typedef struct {
uint32_t ec_code:3, brute:1, n_ec:14, n_ec_high:14;
uint32_t n_absent:24, max_heap:8;
} ecstat_t;

typedef struct {
uint8_t ec:1, ec_high:1, absent:1, absent_high:1, b:4;
} bfc_penalty_t;

typedef struct {
int tot_pen;
int i; // base position
int k; // position in the stack
int32_t ecpos_high[BFC_EC_HIST_HIGH];
int32_t ecpos[BFC_EC_HIST];
bfc_kmer_t x;
} echeap1_t;

typedef struct {
int parent, i, tot_pen;
uint8_t b;
bfc_penalty_t pen;
uint16_t cnt;
} ecstack1_t;

typedef struct {
const bfc_opt_t *opt;
const bfc_ch_t *ch;
kvec_t(echeap1_t) heap;
kvec_t(ecstack1_t) stack;
ecseq_t seq, tmp, ec[2];
int mode;
ecstat_t ori_st;
} bfc_ec1buf_t;

/********************
* Error correction *
********************/

typedef struct {
const bfc_opt_t *opt;
const bfc_ch_t *ch;
bfc_ec1buf_t **e;
int64_t n_processed;
int n_seqs, flt_uniq;
fml_seq1_t *seqs;
} ec_step_t;

void kmer_correct(ec_step_t * es, int mode, bfc_ch_t * ch);
void bfc_opt_init(bfc_opt_t *opt);

#endif
Loading