Skip to content

Commit

Permalink
Updates (#22)
Browse files Browse the repository at this point in the history
  • Loading branch information
mtomko authored Aug 29, 2023
1 parent ea7587d commit 74df425
Show file tree
Hide file tree
Showing 37 changed files with 406 additions and 383 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

## 3.6.4
No user-facing changes

## 3.6.3
* Documentation for processing demultiplexed FASTQ files

Expand Down
19 changes: 9 additions & 10 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,33 @@ val artifactId = "poolq"

inThisBuild(
List(
scalaVersion := "2.13.10",
scalaVersion := "2.13.11",
semanticdbEnabled := true,
semanticdbVersion := scalafixSemanticdb.revision,
scalafixDependencies += "com.github.liancheng" %% "organize-imports" % "0.6.0",
versionScheme := Some("early-semver")
)
)

lazy val versions = new {
val acyclic = "0.2.1"
val betterFiles = "3.9.1"
val betterFiles = "3.9.2"
val betterMonadicFor = "0.3.1"
val catsEffect3 = "3.4.5"
val cats = "2.9.0"
val commonsIo = "2.11.0"
val catsEffect3 = "3.5.1"
val cats = "2.10.0"
val commonsIo = "2.13.0"
val commonsText = "1.10.0"
val commonsMath3 = "3.6.1"
val fastutil = "8.5.11"
val fs2 = "3.5.0"
val fastutil = "8.5.12"
val fs2 = "3.8.0"
val kantanCodecs = "0.5.3"
val kantanCsv = "0.7.0"
val log4s = "1.10.0"
val logback = "1.2.11"
val munit = "0.7.29"
val munitCatsEffect3 = "1.0.7"
val samTools = "3.0.4"
val samTools = "3.0.5"
val scalaCheck = "1.17.0"
val scalaTest = "3.2.15"
val scalaTest = "3.2.16"
val scalaTestPlusScalaCheck = "3.2.2.0"
val scopt = "4.1.0"
val slf4j = "1.7.36"
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.8.2
sbt.version=1.9.4
12 changes: 6 additions & 6 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
addSbtPlugin("ch.epfl.scala" % "sbt-missinglink" % "0.3.3")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.10.4")
addSbtPlugin("ch.epfl.scala" % "sbt-missinglink" % "0.3.6")
addSbtPlugin("ch.epfl.scala" % "sbt-scalafix" % "0.11.0")
addSbtPlugin("com.codecommit" % "sbt-github-packages" % "0.5.3")
addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "1.2.0")
addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.11.0")
addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.2.16")
addSbtPlugin("com.github.cb372" % "sbt-explicit-dependencies" % "0.3.1")
addSbtPlugin("com.github.sbt" % "sbt-release" % "1.1.0")
addSbtPlugin("de.heikoseeberger" % "sbt-header" % "5.9.0")
addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.4.1")
addSbtPlugin("de.heikoseeberger" % "sbt-header" % "5.10.0")
addSbtPlugin("org.typelevel" % "sbt-tpolecat" % "0.5.0")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.0")
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.3")
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "2.0.8")
298 changes: 152 additions & 146 deletions src/main/scala/org/broadinstitute/gpp/poolq3/PoolQConfig.scala

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,12 @@ object BarcodeSet {

def apply(file: Path): BarcodeSet =
Using.resource(new FileInputStream(file.toFile)) { fin =>
val in = new BOMInputStream(fin, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
val in = BOMInputStream
.builder()
.setInputStream(fin)
.setByteOrderMarks(ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
.setInclude(false)
.get()
val br = new BufferedReader(new InputStreamReader(in))
skipHeader(br, BarcodeRe)
br.lines()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,12 @@ object ReferenceData {

def apply(file: Path, quote: Char = '"'): ReferenceData = {
Using.resource(new FileInputStream(file.toFile)) { fin =>
val in = new BOMInputStream(fin, false, ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
val in = BOMInputStream
.builder()
.setInputStream(fin)
.setByteOrderMarks(ByteOrderMark.UTF_8, ByteOrderMark.UTF_16BE, ByteOrderMark.UTF_16LE)
.setInclude(false)
.get()
val br = new BufferedReader(new InputStreamReader(in))
val delimiter = guessDelimiter(br)
val config =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,8 @@ final class ScoringConsumer(
log.debug(s"Incrementing state for ($r, $c}).")
umiReference match {
case None =>
None
// we're not in UMI mode, so just increment the state
state.known.increment(None, (r, c))
val _ = state.known.increment(None, (r, c))
case Some(ref) =>
// we're in UMI mode
handleUmi(umi, ref, r, c)
Expand All @@ -182,7 +181,7 @@ final class ScoringConsumer(
val _ = state.known.increment(Some(u), (r, c))
} else {
// we found an unknown UMI barcode, so track it somehow
state.known.increment(None, (r, c))
val _ = state.known.increment(None, (r, c))
val _ = state.unknownUmi.increment(u)
}
case None =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ object UnexpectedSequenceWriter {
val colBc = fields(1)

// can't avoid the double hash lookup here without a big hassle
r.put(rowBc, r.getOrElseUpdate(rowBc, 0) + 1)
val _ = r.put(rowBc, r.getOrElseUpdate(rowBc, 0) + 1)
h.putIfAbsent(rowBc, new Object2IntOpenHashMap[String]())
h.get(rowBc).addTo(colBc, 1)
}
Expand All @@ -120,7 +120,7 @@ object UnexpectedSequenceWriter {
): Unit = {
val drop = r.toSeq.sortBy { case (_, count) => -count }.drop(n)
drop.foreach { case (bc, _) =>
r.remove(bc)
val _ = r.remove(bc)
h.remove(bc)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,50 +5,53 @@
*/
package org.broadinstitute.gpp.poolq3.barcode

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers._
import munit.FunSuite

class BarcodePolicyTest extends AnyFlatSpec {
class BarcodePolicyTest extends FunSuite {

"BarcodePolicy" should "choose a fixed barcode policy" in {
BarcodePolicy("FIXED@0", 8, false) should be(FixedOffsetPolicy(0, 8, false))
test("fixed barcode policy") {
assertEquals(BarcodePolicy("FIXED@0", 8, false), FixedOffsetPolicy(0, 8, false))
// this is a deprecated option but needs to be supported for the time being
BarcodePolicy("FIXED:0", 8, false) should be(FixedOffsetPolicy(0, 8, false))
assertEquals(BarcodePolicy("FIXED:0", 8, false), FixedOffsetPolicy(0, 8, false))
}

it should "choose a known prefix barcode policy" in {
BarcodePolicy("PREFIX:CACCG@7", 20, false) should be(IndexOfKnownPrefixPolicy("CACCG", 20, Some(7)))
BarcodePolicy("PREFIX:CACCG@7-9", 20, false) should be(IndexOfKnownPrefixPolicy("CACCG", 20, Some(7), Some(9)))
BarcodePolicy("PREFIX:CACCG@-9", 20, false) should be(IndexOfKnownPrefixPolicy("CACCG", 20, None, Some(9)))
test("known prefix barcode policy") {
assertEquals(BarcodePolicy("PREFIX:CACCG@7", 20, false), IndexOfKnownPrefixPolicy("CACCG", 20, Some(7)))
assertEquals(BarcodePolicy("PREFIX:CACCG@7-9", 20, false), IndexOfKnownPrefixPolicy("CACCG", 20, Some(7), Some(9)))
assertEquals(BarcodePolicy("PREFIX:CACCG@-9", 20, false), IndexOfKnownPrefixPolicy("CACCG", 20, None, Some(9)))
}

it should "let the user specify a shorter length with a fixed policy" in {
BarcodePolicy("FIXED@0:6", 6, true) should be(FixedOffsetPolicy(0, 6, true))
test("specify a shorter length with a fixed policy") {
assertEquals(BarcodePolicy("FIXED@0:6", 6, true), FixedOffsetPolicy(0, 6, true))
// this is a deprecated option but needs to be supported for the time being
BarcodePolicy("FIXED:0:6", 6, true) should be(FixedOffsetPolicy(0, 6, true))
assertEquals(BarcodePolicy("FIXED:0:6", 6, true), FixedOffsetPolicy(0, 6, true))
}

it should "let the user specify a shorter length with a known prefix policy" in {
BarcodePolicy("PREFIX:CACCG@7:19", 19, false) should be(IndexOfKnownPrefixPolicy("CACCG", 19, Some(7)))
test("specify a shorter length with a known prefix policy") {
assertEquals(BarcodePolicy("PREFIX:CACCG@7:19", 19, false), IndexOfKnownPrefixPolicy("CACCG", 19, Some(7)))
}

it should "let the user specify a keymask policy" in {
BarcodePolicy("KEYMASK:caccgNNNNttNNNNaa@3", 8, false) should be(
test("keymask policy") {
assertEquals(
BarcodePolicy("KEYMASK:caccgNNNNttNNNNaa@3", 8, false),
GeneralTemplatePolicy(KeyMask("caccgNNNNttNNNNaa"), Some(3), None)
)
BarcodePolicy("TEMPLATE:caccgNNNNttNNNNaa@3", 8, false) should be(
assertEquals(
BarcodePolicy("TEMPLATE:caccgNNNNttNNNNaa@3", 8, false),
GeneralTemplatePolicy(KeyMask("caccgNNNNttNNNNaa"), Some(3), None)
)
}

it should "recognize a split barcode situation" in {
BarcodePolicy("TEMPLATE:caccgNNNNNnnnnnntatgcNNNNaa@3", 9, false) should be(
test("split barcode situation") {
assertEquals(
BarcodePolicy("TEMPLATE:caccgNNNNNnnnnnntatgcNNNNaa@3", 9, false),
SplitBarcodePolicy("CACCG", 5, 6, "TATGC", 4, Some(3), None)
)
}

it should "let the user specify just a 3' limit" in {
BarcodePolicy("TEMPLATE:NNNNNNNNNNNNNNNNNNNNNNN@-1", 23, false) should be(
test("specify just a 3' limit") {
assertEquals(
BarcodePolicy("TEMPLATE:NNNNNNNNNNNNNNNNNNNNNNN@-1", 23, false),
GeneralTemplatePolicy(KeyMask("NNNNNNNNNNNNNNNNNNNNNNN"), None, Some(1))
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ class KeyMaskTest extends AnyFlatSpec {

"KeyMask.apply" should "construct the correct key mask from a pattern" in {
val km0 = KeyMask("NNNNNNNNNNNNNNNNN")
km0 should be(KeyMask.fromString(17, "1-17"))
val _ = km0 should be(KeyMask.fromString(17, "1-17"))
val km1 = KeyMask("NNNNNNNNNNNNNNNNNnNN")
km1 should be(KeyMask.fromString(20, "1-17,19-20"))
val _ = km1 should be(KeyMask.fromString(20, "1-17,19-20"))
val km2 = KeyMask("nNNNNNNNNNNNNNNNNNnNNn")
km2 should be(KeyMask.fromString(22, "2-18,20-21"))
val _ = km2 should be(KeyMask.fromString(22, "2-18,20-21"))
val km3 = KeyMask("nnnNNNNNNNNNNNNNNNNNnNNnN")
km3 should be(KeyMask.fromString(25, "4-20,22-23,25"))
val _ = km3 should be(KeyMask.fromString(25, "4-20,22-23,25"))
val km4 = KeyMask("nnnnNNNNNNNNNNNNNNNNNNNNnnnnnn")
km4 should be(KeyMask.fromString(30, "5-24"))
}
Expand All @@ -30,14 +30,14 @@ class KeyMaskTest extends AnyFlatSpec {
val km5 = KeyMask(
"caccgNNNNNNNNNNNNNNNNNNNNnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnNNNNNNNNNNNNNNNNNNNNN"
)
km5.contextLength should be(240)
km5.keyLengthInBases should be(41)
val _ = km5.contextLength should be(240)
val _ = km5.keyLengthInBases should be(41)
km5.keyRanges should be(Seq(KeyRange(5, 24), KeyRange(219, 239)))
}

"KeyMask.fromString" should "compute the correct key mask from a list of key ranges in either syntax" in {
val km1 = KeyMask.fromString(23, "4-20,22-23")
km1 should be(KeyMask(23, Seq(KeyRange(3, 19), KeyRange(21, 22))))
val _ = km1 should be(KeyMask(23, Seq(KeyRange(3, 19), KeyRange(21, 22))))
val km2 = KeyMask.fromString(23, "4..20,22..23")
km2 should be(KeyMask(23, Seq(KeyRange(3, 19), KeyRange(21, 22))))
}
Expand All @@ -59,7 +59,7 @@ class KeyMaskTest extends AnyFlatSpec {
}

it should "merge adjacent ranges" in {
KeyMask.mergeAdjacent(Seq(KeyRange(1, 9), KeyRange(10, 12), KeyRange(14, 17))) should be(
val _ = KeyMask.mergeAdjacent(Seq(KeyRange(1, 9), KeyRange(10, 12), KeyRange(14, 17))) should be(
Seq(KeyRange(1, 12), KeyRange(14, 17))
)
KeyMask.fromString(10, "1,2..4,5,6-8,9") should be(KeyMask.fromString(10, "1-9"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,22 @@
*/
package org.broadinstitute.gpp.poolq3.barcode

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers._
import munit.FunSuite

/** This class provides additional tests for the KeyMask that are not found in the FISHR codebase from which KeyMask and
* its primary test class were lifted. We will try not to modify the copied test classes to make subsequent updates
* from FISHR easier. Instead, new PoolQ-specific tests will live here.
*/
class KeyMaskTest2 extends AnyFlatSpec {
class KeyMaskTest2 extends FunSuite {

"KeyMask.apply" should "construct the correct key mask from a pattern" in {
test("construct the correct key mask from a pattern") {
// 0 1 2
// 12345678901234567890123456789
val km0 = KeyMask("caccgNNNNNnnnnnnnnnttacaNNNNN")

// parsing should work how we expect
km0.keyRanges should be(Seq(KeyRange(5, 9), KeyRange(24, 28)))
km0.keyLengthInBases should be(10)
assertEquals(km0.keyRanges, Seq(KeyRange(5, 9), KeyRange(24, 28)))
assertEquals(km0.keyLengthInBases, 10)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,32 +14,32 @@ import org.scalatest.matchers.should.Matchers._
class KeyRangeTest extends AnyFlatSpec {

"KeyRange" should "enforce well-formedness" in {
noException should be thrownBy KeyRange(3, 4)
noException should be thrownBy KeyRange(3, 3)
an[IllegalArgumentException] should be thrownBy KeyRange(3, 2)
val _ = noException should be thrownBy KeyRange(3, 4)
val _ = noException should be thrownBy KeyRange(3, 3)
val _ = an[IllegalArgumentException] should be thrownBy KeyRange(3, 2)
an[IllegalArgumentException] should be thrownBy KeyRange(-2, 2)
}

it should "have working compare()" in {
val ord = implicitly[Ordering[KeyRange]]
ord.compare(KeyRange(2, 5), KeyRange(2, 5)) should be(0)
KeyRange(2, 5) should be <= KeyRange(2, 5)
KeyRange(2, 5) should be >= KeyRange(2, 5)
val _ = ord.compare(KeyRange(2, 5), KeyRange(2, 5)) should be(0)
val _ = KeyRange(2, 5) should be <= KeyRange(2, 5)
val _ = KeyRange(2, 5) should be >= KeyRange(2, 5)

KeyRange(2, 5) should be < KeyRange(3, 4)
KeyRange(2, 5) should be < KeyRange(2, 6)
KeyRange(2, 5) should be > KeyRange(2, 4)
val _ = KeyRange(2, 5) should be < KeyRange(3, 4)
val _ = KeyRange(2, 5) should be < KeyRange(2, 6)
val _ = KeyRange(2, 5) should be > KeyRange(2, 4)
KeyRange(2, 5) should be > KeyRange(1, 32)
}

it should "be creatable from a string" in {
KeyRange("1-1") should be(KeyRange(0, 0))
KeyRange("1..1") should be(KeyRange(0, 0))
KeyRange("1") should be(KeyRange(0, 0))
KeyRange("1-6") should be(KeyRange(0, 5))
KeyRange("1..6") should be(KeyRange(0, 5))
an[IllegalArgumentException] should be thrownBy KeyRange("0-5")
an[IllegalArgumentException] should be thrownBy KeyRange("-1-5")
val _ = KeyRange("1-1") should be(KeyRange(0, 0))
val _ = KeyRange("1..1") should be(KeyRange(0, 0))
val _ = KeyRange("1") should be(KeyRange(0, 0))
val _ = KeyRange("1-6") should be(KeyRange(0, 5))
val _ = KeyRange("1..6") should be(KeyRange(0, 5))
val _ = an[IllegalArgumentException] should be thrownBy KeyRange("0-5")
val _ = an[IllegalArgumentException] should be thrownBy KeyRange("-1-5")
an[IllegalArgumentException] should be thrownBy KeyRange("6-5")
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,12 @@ class TemplatePolicyTest extends AnyFlatSpec {
val keymask = KeyMask(pattern)
val kmp = new GeneralTemplatePolicy(keymask, Some(0))

kmp.find(Read("", read1)) should be(Some(FoundBarcode("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5)))
kmp.find(Read("", read2)) should be(Some(FoundBarcode("NTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5)))
val _ = kmp.find(Read("", read1)) should be(
Some(FoundBarcode("TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5))
)
val _ = kmp.find(Read("", read2)) should be(
Some(FoundBarcode("NTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT".toCharArray, 5))
)
kmp.find(Read("", read3)) should be(None)
}

Expand All @@ -131,8 +135,8 @@ class TemplatePolicyTest extends AnyFlatSpec {
(variable: String, r1: String, ns: String, r2: String, rest: String) =>
val read = Read("id", variable + fixed + prefix1 + r1 + ns + prefix2 + r2 + rest)
// warm up phase
nanoTimed(100)(_ => kmp.find(read))
nanoTimed(100)(_ => kpp.find(read))
val _ = nanoTimed(100)(_ => kmp.find(read))
val _ = nanoTimed(100)(_ => kpp.find(read))

// go!
val (ret1, t1) = nanoTimed(10000)(_ => kmp.find(read))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ class OpenHashMapHistogramTest extends FunSuite with ScalaCheckSuite {
test("OpenHashMapHistogram should track frequencies") {
val h = new OpenHashMapHistogram[String]

h.increment("AAAA")
h.increment("AAAA")
h.increment("AAAA")
h.increment("AAAA")
val _ = h.increment("AAAA")
val _ = h.increment("AAAA")
val _ = h.increment("AAAA")
val _ = h.increment("AAAA")

assertEquals(h.count("AAAA"), 4)
assertEquals(h.count("CCCC"), 0)
}

property("OpenHashMapHistogram should track frequencies for arbitrary data") {
forAll { data: List[Int] =>
forAll { (data: List[Int]) =>
val expectedCounts: Map[Int, Int] = data.groupBy(identity).view.mapValues(_.length).toMap

val hist = new OpenHashMapHistogram[Int]
Expand Down
Loading

0 comments on commit 74df425

Please sign in to comment.