Skip to content

Commit 1b95e30

Browse files
committed
fix(ENGKNOW-3212): Fix refbases_with_build performance.
1 parent fa8a073 commit 1b95e30

4 files changed

Lines changed: 27 additions & 4 deletions

File tree

gortools/src/main/scala/gorsat/parser/GenomeFunctions.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import gorsat.parser.FunctionTypes.{dFun, iFun, sFun}
2727
import gorsat.parser.ParseUtilities._
2828
import gorsat.process.GorJavaUtilities
2929
import org.gorpipe.exceptions.GorParsingException
30+
import org.gorpipe.model.gor.iterators.RefSeq
3031

3132
object GenomeFunctions {
3233
def register(functions: FunctionRegistry): Unit = {
@@ -101,8 +102,12 @@ object GenomeFunctions {
101102
}
102103

103104
def refBases_with_build(owner: ParseArith, ex1: sFun, ex2: iFun, ex3: iFun, ex4: sFun): sFun = {
105+
var refSeq: RefSeq = null
104106
cvp => {
105-
owner.context.getSession.getProjectContext.createRefSeq(ex4(cvp)).getBases(ex1(cvp), ex2(cvp), ex3(cvp))
107+
if (refSeq == null) {
108+
refSeq = owner.context.getSession.getProjectContext.createRefSeq(ex4(cvp))
109+
}
110+
refSeq.getBases(ex1(cvp), ex2(cvp), ex3(cvp))
106111
}
107112
}
108113

gortools/src/main/scala/gorsat/parser/ParseArith.scala

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -995,9 +995,8 @@ class ParseArith(rs: GenomicIterator = null) extends JavaTokenParsers with Seria
995995
stringIfMatcher |
996996
genericFunction[sFun](FunctionTypes.StringFun) |
997997
"REFBASES_WITH_BUILD".ignoreCase ~ "(" ~ sexpr ~ "," ~ iexpr ~ "," ~ iexpr ~ "," ~ myStringLiteralFilename ~ ")" ^^ {
998-
case _ ~ "(" ~ ex1 ~ "," ~ ex2 ~ "," ~ ex3 ~ "," ~ ex4 ~ ")" => (line: ColumnValueProvider) => {
999-
refBases_with_build(this, ex1, ex2, ex3, (line: ColumnValueProvider) => ex4)(line)
1000-
}
998+
case _ ~ "(" ~ ex1 ~ "," ~ ex2 ~ "," ~ ex3 ~ "," ~ ex4 ~ ")" =>
999+
refBases_with_build(this, ex1, ex2, ex3, (line: ColumnValueProvider) => ex4)
10011000
} |
10021001
"GTSTAT".ignoreCase ~ "(" ~ sexpr ~ "," ~ iexpr ~ "," ~ sexpr ~ "," ~ sexpr ~ "," ~ iexpr ~ "," ~ sexpr ~ "," ~ sexpr ~ "," ~ iexpr ~ "," ~ sexpr ~ "," ~ sexpr ~ ")" ^^ {
10031002
case _ ~ "(" ~ ex1 ~ "," ~ ex2 ~ "," ~ ex3 ~ "," ~ ex4 ~ "," ~ ex5 ~ "," ~ ex6 ~ "," ~ ex7 ~ "," ~ ex8 ~ "," ~ ex9 ~ "," ~ ex10 ~ ")" => (line: ColumnValueProvider) => {

gortools/src/test/java/gorsat/parser/UTestGenomicFunctions.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,18 @@ public void testRefBasesWithBuild2() {
110110
TestUtils.assertCalculated("refbases_with_build('chr1', 1010101, 1010103, /private/gorkube-mount/csa/ref/hg19/chromSeq)", "ACC");
111111
}
112112

113+
@Ignore("Slow, and not needed for testin. Uses full build, which is not normally available")
114+
@Test
115+
public void testRefBasesWithBuildSpeed() {
116+
TestUtils.runGorPipeCount("gorrows -p chr1:100000-400000 | calc b refbases_with_build(chrom, pos, pos, /private/gorkube-mount/csa/ref/hg19/chromSeq)", ".");
117+
}
118+
119+
@Ignore("Slow, and not needed for testin. Uses full build, which is not normally available")
120+
@Test
121+
public void testRefBasesSpeed() {
122+
TestUtils.runGorPipeCount("gorrows -p chr1:100000-400000 | calc b refbases(chrom, pos, pos)", "-config", "/private/gorkube-mount/csa/ref/hg19/gor_config.txt");
123+
}
124+
113125
@Test
114126
public void testBamTag() {
115127
TestUtils.assertCalculated("bamtag('', '')", "NOT_FOUND");

model/src/main/scala/org/gorpipe/model/gor/iterators/RefSeqFromChromSeq.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,13 @@ object RefSeqFromChromSeq {
4141
new java.util.concurrent.ConcurrentHashMap[String, java.lang.Boolean]
4242
}
4343

44+
// IMPORTANT NOTE ON THREAD SAFETY:
45+
// This class is NOT thread safe. It is expected that each thread that needs to access reference sequence data
46+
// will have its own instance of this class.
47+
// 1. The LUFO cache is per se thread safe but it is very small so if used by many threads it will cause a lot
48+
// of thrasing and bad performance.
49+
// 2. The use of the filemap is NOT thread safe and is very likely to cause incorrect reads of the cache data.
50+
4451
class RefSeqFromChromSeq(ipath : String, fileReader : FileReader) extends RefSeq {
4552
private val GOR_REFSEQ_CACHE_FOLDER = System.getProperty("gor.refseq.cache.folder")
4653
private val GOR_REFSEQ_CACHE_DOWNLOAD = Option(System.getProperty("gor.refseq.cache.download", "true")).exists(_.toBoolean)

0 commit comments

Comments
 (0)