Skip to content
This repository was archived by the owner on Nov 15, 2024. It is now read-only.

Commit aecae0e

Browse files
committed
Merge branch 'develop'
2 parents c6468c0 + fc6ad11 commit aecae0e

108 files changed

Lines changed: 6302 additions & 413 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
.DS_Store
44
.artifactory
55
.idea/*
6+
**/.idea/*
67
.ensime_cache/*
78
.config/*
89
.local/*
@@ -47,3 +48,6 @@ private_docker_papi_v2_usa.options
4748
tesk_application_ftp.conf
4849
ftp_centaur_cwl_runner.conf
4950
tesk_application.conf
51+
**/__pycache__/
52+
**/venv/
53+
exome_germline_single_sample_v1.3/

.travis.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ env:
108108
BUILD_TYPE=dbms
109109
- >-
110110
BUILD_TYPE=singleWorkflowRunner
111+
- >-
112+
BUILD_TYPE=metadataComparisonPython
111113
script:
112114
- src/ci/bin/test.sh
113115
notifications:

CHANGELOG.md

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,37 @@
11
# Cromwell Change Log
22

3+
## 51 Release Notes
4+
5+
### Changes and Warnings
6+
7+
The configuration format for call cache blacklisting has been updated, please see the [call caching documentation](
8+
https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching) for details.
9+
10+
### Bug fixes
11+
12+
* Fixed a bug where the `size(...)` function did not work correctly on files
13+
from a shared filesystem if `size(...)` was called in the input section on a
14+
relative path.
15+
+ Fixed a bug where the `use_relative_output_paths` option would not preserve intermediate folders.
16+
17+
### New functionality
18+
19+
#### Call caching blacklisting improvements
20+
21+
Cromwell previously supported blacklisting GCS buckets containing cache hits which could not be copied for permissions
22+
reasons. Cromwell now adds support for blacklisting individual cache hits which could not be copied for any reason,
23+
as well as grouping blacklist caches according to a workflow option key. More information available in the [
24+
call caching documentation]( https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching).
25+
26+
#### new xxh64 and fingerprint strategies for call caching
27+
28+
Existing call cache strategies `path` and `path+modtime` don't work when using docker on shared filesystems
29+
(SFS backend, i.e. not in cloud storage). The `file` (md5sum) strategy works, but uses a lot of resources.
30+
Two faster strategies have been added for this use case: `xxh64` and
31+
`fingerprint`. `xxh64` is a lightweight hashing algorithm, `fingerprint` is a strategy designed to be very
32+
lightweight. Read more about it in the [call caching documentation](
33+
https://cromwell.readthedocs.io/en/stable/Configuring/#call-caching).
34+
335
## 50 Release Notes
436

537
### Changes and Warnings
@@ -11,7 +43,6 @@ Cromwell's metadata archival configuration has changed in a backwards incompatib
1143
please see
1244
[the updated documentation](https://cromwell.readthedocs.io/en/stable/Configuring#hybrid-metadata-storage-classic-carbonite) for details.
1345

14-
1546
## 49 Release Notes
1647

1748
### Changes and Warnings

backend/src/main/scala/cromwell/backend/BackendCacheHitCopyingActor.scala

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,19 @@ package cromwell.backend
33
import cromwell.backend.MetricableCacheCopyErrorCategory.MetricableCacheCopyErrorCategory
44
import cromwell.core.JobKey
55
import cromwell.core.simpleton.WomValueSimpleton
6+
import cromwell.services.CallCaching.CallCachingEntryId
67

78
object BackendCacheHitCopyingActor {
8-
final case class CopyOutputsCommand(womValueSimpletons: Seq[WomValueSimpleton], jobDetritusFiles: Map[String, String], returnCode: Option[Int])
9+
final case class CopyOutputsCommand(womValueSimpletons: Seq[WomValueSimpleton], jobDetritusFiles: Map[String, String], cacheHit: CallCachingEntryId, returnCode: Option[Int])
910

10-
final case class CopyingOutputsFailedResponse(jobKey: JobKey, cacheCopyAttempt: Int, failure: CacheCopyError)
11+
final case class CopyingOutputsFailedResponse(jobKey: JobKey, cacheCopyAttempt: Int, failure: CacheCopyFailure)
1112

12-
sealed trait CacheCopyError
13-
final case class LoggableCacheCopyError(failure: Throwable) extends CacheCopyError
14-
final case class MetricableCacheCopyError(failureCategory: MetricableCacheCopyErrorCategory) extends CacheCopyError
13+
sealed trait CacheCopyFailure
14+
/** A cache hit copy was attempted but failed. */
15+
final case class CopyAttemptError(failure: Throwable) extends CacheCopyFailure
16+
/** Copying was requested for a blacklisted cache hit, however the cache hit copying actor found the hit had already
17+
* been blacklisted so no novel copy attempt was made. */
18+
final case class BlacklistSkip(failureCategory: MetricableCacheCopyErrorCategory) extends CacheCopyFailure
1519
}
1620

1721
object MetricableCacheCopyErrorCategory {
@@ -20,4 +24,5 @@ object MetricableCacheCopyErrorCategory {
2024
override def toString: String = getClass.getSimpleName.stripSuffix("$").toLowerCase
2125
}
2226
final case object BucketBlacklisted extends MetricableCacheCopyErrorCategory
27+
final case object HitBlacklisted extends MetricableCacheCopyErrorCategory
2328
}

backend/src/main/scala/cromwell/backend/standard/callcaching/BlacklistCache.scala

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,59 @@ package cromwell.backend.standard.callcaching
22

33
import com.google.common.cache.{CacheBuilder, CacheLoader}
44
import cromwell.core.CacheConfig
5+
import cromwell.services.CallCaching.CallCachingEntryId
56

6-
case class BlacklistCache(config: CacheConfig) {
7-
val cache = {
8-
// Queries to the blacklist cache return false by default (i.e. not blacklisted).
9-
val falseLoader = new CacheLoader[String, java.lang.Boolean]() {
10-
override def load(key: String): java.lang.Boolean = false
7+
sealed trait BlacklistStatus
8+
case object BadCacheResult extends BlacklistStatus
9+
case object GoodCacheResult extends BlacklistStatus
10+
case object UntestedCacheResult extends BlacklistStatus
11+
12+
sealed abstract class BlacklistCache(bucketCacheConfig: CacheConfig,
13+
hitCacheConfig: CacheConfig,
14+
val name: Option[String]) {
15+
val bucketCache = {
16+
// Queries to the bucket blacklist cache return UntestedCacheResult by default.
17+
val unknownLoader = new CacheLoader[String, BlacklistStatus]() {
18+
override def load(key: String): BlacklistStatus = UntestedCacheResult
19+
}
20+
21+
CacheBuilder.
22+
newBuilder().
23+
concurrencyLevel(bucketCacheConfig.concurrency).
24+
maximumSize(bucketCacheConfig.size).
25+
expireAfterWrite(bucketCacheConfig.ttl.length, bucketCacheConfig.ttl.unit).
26+
build[String, BlacklistStatus](unknownLoader)
27+
}
28+
29+
val hitCache = {
30+
// Queries to the hit blacklist cache return UntestedCacheResult by default (i.e. not blacklisted).
31+
val unknownLoader = new CacheLoader[CallCachingEntryId, BlacklistStatus]() {
32+
override def load(key: CallCachingEntryId): BlacklistStatus = UntestedCacheResult
1133
}
1234

1335
CacheBuilder.
1436
newBuilder().
15-
concurrencyLevel(config.concurrency).
16-
maximumSize(config.size).
17-
expireAfterWrite(config.ttl.length, config.ttl.unit).
18-
build[String, java.lang.Boolean](falseLoader)
37+
concurrencyLevel(hitCacheConfig.concurrency).
38+
maximumSize(hitCacheConfig.size).
39+
expireAfterWrite(hitCacheConfig.ttl.length, hitCacheConfig.ttl.unit).
40+
build[CallCachingEntryId, BlacklistStatus](unknownLoader)
1941
}
2042

21-
def isBlacklisted(bucket: String): Boolean = cache.get(bucket)
43+
def getBlacklistStatus(hit: CallCachingEntryId): BlacklistStatus = hitCache.get(hit)
2244

23-
def blacklist(bucket: String): Unit = cache.put(bucket, true)
45+
def getBlacklistStatus(bucket: String): BlacklistStatus = bucketCache.get(bucket)
46+
47+
def blacklist(hit: CallCachingEntryId): Unit = hitCache.put(hit, BadCacheResult)
48+
49+
def blacklist(bucket: String): Unit = bucketCache.put(bucket, BadCacheResult)
50+
51+
def whitelist(hit: CallCachingEntryId): Unit = hitCache.put(hit, GoodCacheResult)
52+
53+
def whitelist(bucket: String): Unit = bucketCache.put(bucket, GoodCacheResult)
2454
}
55+
56+
class RootWorkflowBlacklistCache(bucketCacheConfig: CacheConfig, hitCacheConfig: CacheConfig) extends
57+
BlacklistCache(bucketCacheConfig = bucketCacheConfig, hitCacheConfig = hitCacheConfig, name = None)
58+
59+
class GroupingBlacklistCache(bucketCacheConfig: CacheConfig, hitCacheConfig: CacheConfig, val group: String) extends
60+
BlacklistCache(bucketCacheConfig = bucketCacheConfig, hitCacheConfig = hitCacheConfig, name = Option(group))
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
package cromwell.backend.standard.callcaching
2+
3+
import akka.event.LoggingAdapter
4+
import com.google.common.cache.{CacheBuilder, CacheLoader, LoadingCache}
5+
import com.typesafe.config.Config
6+
import cromwell.core.{CacheConfig, HasWorkflowIdAndSources}
7+
import mouse.boolean._
8+
import net.ceedubs.ficus.Ficus._
9+
10+
import scala.concurrent.duration._
11+
import scala.language.postfixOps
12+
13+
object CallCachingBlacklistManager {
14+
object Defaults {
15+
object Groupings {
16+
val Concurrency = 10000
17+
val Size = 1000L
18+
val Ttl = 2 hours
19+
}
20+
object Hits {
21+
val Concurrency = 10000
22+
val Size = 20000L
23+
val Ttl = 1 hour
24+
}
25+
object Buckets {
26+
val Concurrency = 10000
27+
val Size = 1000L
28+
val Ttl = 1 hour
29+
}
30+
}
31+
}
32+
33+
class CallCachingBlacklistManager(rootConfig: Config, logger: LoggingAdapter) {
34+
35+
// Defined if "call-caching.blacklist-cache.enabled = true".
36+
private val blacklistCacheConfig: Option[Unit] =
37+
rootConfig.getOrElse("call-caching.blacklist-cache.enabled", false).option(())
38+
39+
// Defined if `blacklistCacheConfig` is defined and "call-caching.blacklist-cache.groupings.workflow-option" is defined.
40+
private val blacklistGroupingWorkflowOptionKey: Option[String] = for {
41+
_ <- blacklistCacheConfig // Only return a groupings cache if blacklisting is enabled.
42+
workflowOption <- rootConfig.as[Option[String]]("call-caching.blacklist-cache.groupings.workflow-option")
43+
} yield workflowOption
44+
45+
// Defined if `blacklistGroupingWorkflowOptionKey` is defined.
46+
private val blacklistGroupingCacheConfig: Option[CacheConfig] = {
47+
import CallCachingBlacklistManager.Defaults.Groupings._
48+
for {
49+
_ <- blacklistGroupingWorkflowOptionKey
50+
groupingsOption = rootConfig.as[Option[Config]] ("call-caching.blacklist-cache.groupings")
51+
conf = CacheConfig.config(groupingsOption, defaultConcurrency = Concurrency, defaultSize = Size, defaultTtl = Ttl)
52+
} yield conf
53+
}
54+
55+
// Defined if `blacklistCacheConfig` is defined.
56+
private val blacklistBucketCacheConfig: Option[CacheConfig] = {
57+
import CallCachingBlacklistManager.Defaults.Buckets._
58+
for {
59+
_ <- blacklistCacheConfig
60+
bucketsOption = rootConfig.as[Option[Config]]("call-caching.blacklist-cache.buckets")
61+
conf = CacheConfig.config(bucketsOption, defaultConcurrency = Concurrency, defaultSize = Size, defaultTtl = Ttl)
62+
} yield conf
63+
}
64+
65+
// Defined if `blacklistCacheConfig` is defined.
66+
private val blacklistHitCacheConfig: Option[CacheConfig] = {
67+
import CallCachingBlacklistManager.Defaults.Hits._
68+
for {
69+
_ <- blacklistCacheConfig
70+
hitsOption = rootConfig.as[Option[Config]]("call-caching.blacklist-cache.hits")
71+
conf = CacheConfig.config(hitsOption, defaultConcurrency = Concurrency, defaultSize = Size, defaultTtl = Ttl)
72+
} yield conf
73+
}
74+
75+
// If configuration allows, build a cache of blacklist groupings to BlacklistCaches.
76+
private val blacklistGroupingsCache: Option[LoadingCache[String, BlacklistCache]] = {
77+
def buildBlacklistGroupingsCache(groupingConfig: CacheConfig, bucketConfig: CacheConfig, hitConfig: CacheConfig): LoadingCache[String, BlacklistCache] = {
78+
val emptyBlacklistCacheLoader = new CacheLoader[String, BlacklistCache]() {
79+
override def load(key: String): BlacklistCache = new GroupingBlacklistCache(
80+
bucketCacheConfig = bucketConfig,
81+
hitCacheConfig = hitConfig,
82+
group = key
83+
)
84+
}
85+
86+
CacheBuilder.
87+
newBuilder().
88+
concurrencyLevel(groupingConfig.concurrency).
89+
maximumSize(groupingConfig.size).
90+
expireAfterWrite(groupingConfig.ttl.length, groupingConfig.ttl.unit).
91+
build[String, BlacklistCache](emptyBlacklistCacheLoader)
92+
}
93+
94+
for {
95+
groupingsConfig <- blacklistGroupingCacheConfig
96+
bucketsConfig <- blacklistBucketCacheConfig
97+
hitsConfig <- blacklistHitCacheConfig
98+
} yield buildBlacklistGroupingsCache(groupingsConfig, bucketsConfig, hitsConfig)
99+
}
100+
101+
/**
102+
* If configured return a group blacklist cache, otherwise if configured return a root workflow cache,
103+
* otherwise return nothing.
104+
*/
105+
def blacklistCacheFor(workflow: HasWorkflowIdAndSources): Option[BlacklistCache] = {
106+
// If configuration is set up for blacklist groups and a blacklist group is specified in workflow options,
107+
// get the BlacklistCache for the group.
108+
val groupBlacklistCache: Option[BlacklistCache] = for {
109+
groupings <- blacklistGroupingsCache
110+
groupKey <- blacklistGroupingWorkflowOptionKey
111+
groupFromWorkflowOptions <- workflow.sources.workflowOptions.get(groupKey).toOption
112+
} yield groupings.get(groupFromWorkflowOptions)
113+
114+
// Build a blacklist cache for a single, ungrouped root workflow.
115+
def rootWorkflowBlacklistCache: Option[BlacklistCache] = for {
116+
bucketConfig <- blacklistBucketCacheConfig
117+
hitConfig <- blacklistHitCacheConfig
118+
} yield new RootWorkflowBlacklistCache(bucketCacheConfig = bucketConfig, hitCacheConfig = hitConfig)
119+
120+
// Return the group blacklist cache if available, otherwise a blacklist cache for the root workflow.
121+
val maybeCache = groupBlacklistCache orElse rootWorkflowBlacklistCache
122+
maybeCache collect {
123+
case group: GroupingBlacklistCache =>
124+
logger.info("Workflow {} using group blacklist cache '{}' containing blacklist status for {} hits and {} buckets.",
125+
workflow.id, group.group, group.hitCache.size(), group.bucketCache.size())
126+
case _: RootWorkflowBlacklistCache =>
127+
logger.info("Workflow {} using root workflow blacklist cache.", workflow.id)
128+
}
129+
maybeCache
130+
}
131+
}

0 commit comments

Comments
 (0)