From b734a7e5c0b3de41d0b563d5642d6c58ca9ec0f7 Mon Sep 17 00:00:00 2001 From: James Hayhurst Date: Tue, 26 May 2026 16:52:04 +0100 Subject: [PATCH] added a memory-based LRU fetcher cache separate cache for each fetcher make cache method public so we can use in tests --- app/models/Backend.scala | 3 +- app/models/entities/Configuration.scala | 20 +++- app/models/gql/Cache.scala | 126 +++++++++++++++++++ app/models/gql/Fetchers.scala | 133 ++++++++++----------- conf/application.conf | 4 + test/controllers/CacheControllerTest.scala | 2 +- 6 files changed, 213 insertions(+), 75 deletions(-) create mode 100644 app/models/gql/Cache.scala diff --git a/app/models/Backend.scala b/app/models/Backend.scala index 1b9a31db..92a5846c 100644 --- a/app/models/Backend.scala +++ b/app/models/Backend.scala @@ -30,7 +30,7 @@ import models.entities.Studies.* import models.entities.Evidences.* import models.entities.SequenceOntologyTerm.* import models.entities.* -import models.gql.{InteractionSourceEnum, StudyTypeEnum} +import models.gql.{Fetchers, StudyTypeEnum, InteractionSourceEnum} import models.entities.Violations.{DateFilterError, InputParameterCheckError} import org.apache.http.impl.nio.reactor.IOReactorConfig import play.api.cache.AsyncCacheApi @@ -63,6 +63,7 @@ class Backend @Inject() (implicit markerContext = newMarkerContext implicit val defaultOTSettings: OTSettings = loadConfigurationObject[OTSettings]("ot", config) + Fetchers.configure(defaultOTSettings.cache) implicit val defaultESSettings: ElasticsearchSettings = defaultOTSettings.elasticsearch implicit val dbConfig: DatabaseConfig[ClickHouseProfile] = dbConfigProvider.get[ClickHouseProfile] diff --git a/app/models/entities/Configuration.scala b/app/models/entities/Configuration.scala index 8772c750..332fce6b 100644 --- a/app/models/entities/Configuration.scala +++ b/app/models/entities/Configuration.scala @@ -124,13 +124,16 @@ object Configuration { /** main Open Targets configuration object. It keeps track of meta, elasticsearch and clickhouse * configuration. */ + case class CacheSettings(fetcherMaxMb: Long) + case class OTSettings( meta: Meta, elasticsearch: ElasticsearchSettings, clickhouse: ClickhouseSettings, ignoreCache: Boolean, qValidationLimitNTerms: Int, - logging: Logging + logging: Logging, + cache: CacheSettings ) implicit val loggingJsonImp: OFormat[Logging] = Json.format[Logging] @@ -217,25 +220,34 @@ object Configuration { implicit val clickhouseSettingsJSONImp: OFormat[ClickhouseSettings] = Json.format[ClickhouseSettings] + implicit val cacheSettingsJSONImp: Reads[CacheSettings] = + (__ \ "fetcherMaxMb") + .read[String] + .map(s => CacheSettings(s.toLong)) + .orElse((__ \ "fetcherMaxMb").read[Long].map(CacheSettings.apply)) + implicit val otSettingsJSONImp: Reads[OTSettings] = ((__ \ "meta").read[Meta] and (__ \ "elasticsearch").read[ElasticsearchSettings] and (__ \ "clickhouse").read[ClickhouseSettings] and (__ \ "ignoreCache").read[String] and (__ \ "qValidationLimitNTerms").read[String] and - (__ \ "logging").read[Logging])( + (__ \ "logging").read[Logging] and + (__ \ "cache").read[CacheSettings])( (meta, elasticsearchSettings, clickhouseSettings, ignoreCache, qValidationLimitNTerms, - logging + logging, + cache ) => OTSettings.apply(meta, elasticsearchSettings, clickhouseSettings, ignoreCache.toBooleanOption.getOrElse(false), qValidationLimitNTerms.toInt, - logging + logging, + cache ) ) } diff --git a/app/models/gql/Cache.scala b/app/models/gql/Cache.scala new file mode 100644 index 00000000..618cf587 --- /dev/null +++ b/app/models/gql/Cache.scala @@ -0,0 +1,126 @@ +package models.gql + +import sangria.execution.deferred.FetcherCache +import com.github.benmanes.caffeine.cache.{Cache, Caffeine} +import scala.jdk.CollectionConverters._ + +/** LRU (Least Recently Used) cache implementation for FetcherCache using Caffeine. + * + * @param maxBytes + * Maximum total memory in bytes for each cache before eviction occurs + */ +class LruFetcherCache(maxBytes: Long = 256L * 1024 * 1024) extends FetcherCache { + + private def estimateBytes(key: Any, value: Any): Int = { + val size = (key.toString.length + value.toString.length) * 2 + math.max(1, math.min(size, Int.MaxValue)).toInt + } + + private def estimateBytesSeq(key: Any, values: Seq[Any]): Int = { + val size = key.toString.length * 2 + values.foldLeft(0)((acc, v) => acc + v.toString.length * 2) + math.max(1, math.min(size, Int.MaxValue)).toInt + } + + // Primary cache for entity lookups by ID + private val cache: Cache[Any, Any] = Caffeine + .newBuilder() + .maximumWeight(maxBytes) + .weigher[Any, Any]((k, v) => estimateBytes(k, v)) + // .recordStats() + .build[Any, Any]() + + // Cache for relationship lookups + private val relCache: Cache[Any, Seq[Any]] = Caffeine + .newBuilder() + .maximumWeight(maxBytes) + .weigher[Any, Seq[Any]]((k, v) => estimateBytesSeq(k, v)) + // .recordStats() + .build[Any, Seq[Any]]() + + def cacheKey(id: Any): Any = id + + def cacheKeyRel(rel: Any, relId: Any): Any = rel -> relId + + def cacheable(id: Any): Boolean = true + + def cacheableRel(rel: Any, relId: Any): Boolean = true + + def get(id: Any): Option[Any] = + Option(cache.getIfPresent(cacheKey(id))) + + def getRel(rel: Any, relId: Any): Option[Seq[Any]] = + Option(relCache.getIfPresent(cacheKeyRel(rel, relId))) + + def update(id: Any, value: Any): Unit = + if (cacheable(id)) { + cache.put(cacheKey(id), value) + } + + def updateRel[T](rel: Any, relId: Any, idFn: T => Any, values: Seq[T]): Unit = + if (cacheableRel(rel, relId)) { + values.foreach { v => + update(idFn(v), v) + } + relCache.put(cacheKeyRel(rel, relId), values) + } + + def clear(): Unit = { + cache.invalidateAll() + relCache.invalidateAll() + } + + override def clearId(id: Any): Unit = + cache.invalidate(cacheKey(id)) + + override def clearRel(rel: Any): Unit = { + // Iterate through all keys and remove matching relationships + val keysToRemove = relCache.asMap().keySet().asScala.filter { + case (r, _) => r == rel + case _ => false + } + relCache.invalidateAll(keysToRemove.asJava) + } + + override def clearRelId(rel: Any, relId: Any): Unit = + relCache.invalidate(cacheKeyRel(rel, relId)) + + /** Get current cache statistics. Note that recordStats needs to be uncommented if you want stats. + * It is commented out because there is a performance hit in recording stats. + */ + def stats(): CacheStats = { + val cacheStats = cache.stats() + val relCacheStats = relCache.stats() + CacheStats( + entityCacheSize = cache.estimatedSize(), + relCacheSize = relCache.estimatedSize(), + entityHitRate = cacheStats.hitRate(), + relHitRate = relCacheStats.hitRate(), + entityEvictionCount = cacheStats.evictionCount(), + relEvictionCount = relCacheStats.evictionCount() + ) + } +} + +case class CacheStats( + entityCacheSize: Long, + relCacheSize: Long, + entityHitRate: Double, + relHitRate: Double, + entityEvictionCount: Long, + relEvictionCount: Long +) { + override def toString: String = + s"entity[size=$entityCacheSize, hitRate=${f"$entityHitRate%.2f"}, evictions=$entityEvictionCount] " + + s"rel[size=$relCacheSize, hitRate=${f"$relHitRate%.2f"}, evictions=$relEvictionCount]" +} + +object LruFetcherCache { + + /** Create an LRU cache with default memory limit (256 MB per cache) + */ + def apply(): LruFetcherCache = new LruFetcherCache() + + /** Create an LRU cache with a custom memory limit in bytes + */ + def apply(maxBytes: Long): LruFetcherCache = new LruFetcherCache(maxBytes) +} diff --git a/app/models/gql/Fetchers.scala b/app/models/gql/Fetchers.scala index 032c96c7..025e6ecc 100644 --- a/app/models/gql/Fetchers.scala +++ b/app/models/gql/Fetchers.scala @@ -23,182 +23,199 @@ import models.entities.{ TargetPrioritisation, VariantIndex } +import models.entities.Configuration.CacheSettings import models.{Backend, entities} -import sangria.execution.deferred.{Fetcher, FetcherCache, FetcherConfig, HasId, SimpleFetcherCache} +import sangria.execution.deferred.{Fetcher, FetcherConfig, HasId} import utils.OTLogging import scala.concurrent.* object Fetchers extends OTLogging { - val soTermsFetcherCache = FetcherCache.simple + private var maxBytes: Long = 256L * 1024 * 1024 + + def configure(settings: CacheSettings): Unit = + maxBytes = settings.fetcherMaxMb * 1024 * 1024 + + private lazy val caches: Map[String, LruFetcherCache] = { + val names = Seq( + "soTerm", + "target", + "targetEssentiality", + "disease", + "expression", + "mechanismsOfAction", + "mousePhenotypes", + "otarProjects", + "biosample", + "hpo", + "drug", + "drugWarnings", + "go", + "variant", + "credibleSet", + "study", + "clinicalReport", + "targetPrioritisation", + "pharmacogenomicsByDrug", + "pharmacogenomicsByVariant", + "pharmacogenomicsByTarget" + ) + val perCacheBytes = maxBytes / names.size + names.map(n => n -> LruFetcherCache(perCacheBytes)).toMap + } + + def cacheFor(name: String): LruFetcherCache = caches(name) + implicit val soTermHasId: HasId[SequenceOntologyTerm, String] = HasId[SequenceOntologyTerm, String](_.id) val soTermsFetcher: Fetcher[Backend, SequenceOntologyTerm, SequenceOntologyTerm, String] = Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(soTermsFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("soTerm")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getSoTerms(ids) ) - val targetsFetcherCache = FetcherCache.simple // target implicit val targetHasId: HasId[Target, String] = HasId[Target, String](_.id) val targetsFetcher: Fetcher[Backend, Target, Target, String] = Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(targetsFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("target")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getTargets(ids) ) // target essentiality implicit val targetEssentialityHasId: HasId[TargetEssentiality, String] = HasId[TargetEssentiality, String](_.id) - val targetEssentialityFetcherCache = FetcherCache.simple val targetEssentialityFetcher: Fetcher[Backend, TargetEssentiality, TargetEssentiality, String] = Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(targetEssentialityFetcherCache), + .caching(cacheFor("targetEssentiality")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getTargetEssentiality(ids) ) // disease - val diseasesFetcherCache = FetcherCache.simple implicit val diseaseHasId: HasId[Disease, String] = HasId[Disease, String](_.id) val diseasesFetcher: Fetcher[Backend, Disease, Disease, String] = Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(diseasesFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("disease")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getDiseases(ids) ) - val expressionFetcherCache = FetcherCache.simple implicit val expressionHasId: HasId[Expressions, String] = HasId[Expressions, String](_.id) val expressionFetcher: Fetcher[Backend, Expressions, Expressions, String] = Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(expressionFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("expression")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getExpressions(ids) ) - val mechanismsOfActionFetcherCache = FetcherCache.simple implicit val mechanismsOfActionHasId: HasId[MechanismsOfAction, String] = HasId[MechanismsOfAction, String](_.chemblId) val mechanismsOfActionFetcher: Fetcher[Backend, MechanismsOfAction, MechanismsOfAction, String] = Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(mechanismsOfActionFetcherCache), + .caching(cacheFor("mechanismsOfAction")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getMechanismsOfAction(ids) ) - val mousePhenotypesFetcherCache = FetcherCache.simple implicit val mousePhenotypesHasId: HasId[MousePhenotypes, String] = HasId[MousePhenotypes, String](_.id) val mousePhenotypesFetcher: Fetcher[Backend, MousePhenotypes, MousePhenotypes, String] = Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(mousePhenotypesFetcherCache), + .caching(cacheFor("mousePhenotypes")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getMousePhenotypes(ids) ) - val otarProjectsFetcherCache = FetcherCache.simple implicit val otarProjectsHasId: HasId[OtarProjects, String] = HasId[OtarProjects, String](_.efoId) val otarProjectsFetcher: Fetcher[Backend, OtarProjects, OtarProjects, String] = Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(otarProjectsFetcherCache), + .caching(cacheFor("otarProjects")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getOtarProjects(ids) ) // hpo fetcher implicit val biosampleHasId: HasId[Biosample, String] = HasId[Biosample, String](_.biosampleId) - val biosamplesFetcherCache = FetcherCache.simple val biosamplesFetcher: Fetcher[Backend, Biosample, Biosample, String] = Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(biosamplesFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("biosample")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getBiosamples(ids) ) // hpo fetcher implicit val hpoHasId: HasId[HPO, String] = HasId[HPO, String](_.id) - val hpoFetcherCache = FetcherCache.simple val hposFetcher: Fetcher[Backend, HPO, HPO, String] = Fetcher( - config = FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(hpoFetcherCache), + config = FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("hpo")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getHPOs(ids) ) // drug implicit val drugHasId: HasId[Drug, String] = HasId[Drug, String](_.id) - val drugsFetcherCache = FetcherCache.simple val drugsFetcher: Fetcher[Backend, Drug, Drug, String] = Fetcher( - config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(drugsFetcherCache), + config = FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("drug")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getDrugs(ids) ) - val drugWarningsFetcherCache = FetcherCache.simple implicit val drugWarningsHasId: HasId[DrugWarnings, String] = HasId[DrugWarnings, String](_.chemblId) val drugWarningsFetcher: Fetcher[Backend, DrugWarnings, DrugWarnings, String] = Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(drugWarningsFetcherCache), + .caching(cacheFor("drugWarnings")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getDrugWarnings(ids) ) implicit val goFetcherId: HasId[GeneOntologyTerm, String] = HasId[GeneOntologyTerm, String](_.id) - val goFetcherCache = FetcherCache.simple val goFetcher: Fetcher[Backend, GeneOntologyTerm, GeneOntologyTerm, String] = Fetcher( - config = FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(goFetcherCache), + config = FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("go")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getGoTerms(ids) ) implicit val variantFetcherId: HasId[VariantIndex, String] = HasId[VariantIndex, String](_.variantId) - val variantFetcherCache = FetcherCache.simple val variantFetcher: Fetcher[Backend, VariantIndex, VariantIndex, String] = Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(variantFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("variant")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getVariants(ids) ) - val credibleSetFetcherCache = FetcherCache.simple val credibleSetFetcher: Fetcher[Backend, CredibleSet, CredibleSet, String] = { implicit val credibleSetFetcherId: HasId[CredibleSet, String] = HasId[CredibleSet, String](js => js.studyLocusId) Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(credibleSetFetcherCache), + .caching(cacheFor("credibleSet")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getCredibleSet(ids) ) } - val studyFetcherCache = FetcherCache.simple val studyFetcher: Fetcher[Backend, Study, Study, String] = { implicit val studyFetcherId: HasId[Study, String] = HasId[Study, String](js => (js.studyId)) Fetcher( config = - FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(studyFetcherCache), + FetcherConfig.maxBatchSize(entities.Configuration.batchSize).caching(cacheFor("study")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getStudy(ids) ) } - val clinicalReportFetcherCache = FetcherCache.simple val clinicalReportFetcher: Fetcher[Backend, ClinicalReport, ClinicalReport, String] = { implicit val clinicalreportFetcherId: HasId[ClinicalReport, String] = HasId[ClinicalReport, String](js => js.id) Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(clinicalReportFetcherCache), + .caching(cacheFor("clinicalReport")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getClinicalReports(ids) ) } - val targetPrioritisationFetcherCache = FetcherCache.simple implicit val targetPrioritisationHasId: HasId[TargetPrioritisation, String] = HasId[TargetPrioritisation, String](_.targetId) val targetPrioritisationFetcher @@ -206,11 +223,10 @@ object Fetchers extends OTLogging { Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(targetPrioritisationFetcherCache), + .caching(cacheFor("targetPrioritisation")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getTargetPrioritisation(ids) ) - val pharmacogenomicsByDrugFetcherCache = FetcherCache.simple implicit val pharmacogenomicsByDrugHasId: HasId[PharmacogenomicsByDrug, String] = HasId[PharmacogenomicsByDrug, String](js => js.drugId) val pharmacogenomicsByDrugFetcher @@ -218,11 +234,10 @@ object Fetchers extends OTLogging { Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(pharmacogenomicsByDrugFetcherCache), + .caching(cacheFor("pharmacogenomicsByDrug")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getPharmacogenomicsByDrug(ids) ) - val pharmacogenomicsByVariantFetcherCache = FetcherCache.simple implicit val pharmacogenomicsByVariantHasId: HasId[PharmacogenomicsByVariant, String] = HasId[PharmacogenomicsByVariant, String](js => js.variantId) val pharmacogenomicsByVariantFetcher @@ -230,11 +245,10 @@ object Fetchers extends OTLogging { Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(pharmacogenomicsByVariantFetcherCache), + .caching(cacheFor("pharmacogenomicsByVariant")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getPharmacogenomicsByVariant(ids) ) - val pharmacogenomicsByTargetFetcherCache = FetcherCache.simple implicit val pharmacogenomicsByTargetHasId: HasId[PharmacogenomicsByTarget, String] = HasId[PharmacogenomicsByTarget, String](js => js.targetFromSourceId) val pharmacogenomicsByTargetFetcher @@ -242,35 +256,16 @@ object Fetchers extends OTLogging { Fetcher( config = FetcherConfig .maxBatchSize(entities.Configuration.batchSize) - .caching(pharmacogenomicsByTargetFetcherCache), + .caching(cacheFor("pharmacogenomicsByTarget")), fetch = (ctx: Backend, ids: Seq[String]) => ctx.getPharmacogenomicsByTarget(ids) ) def resetCache(): Unit = { - logger.info("clearing all graphql caches") - val fetchers: List[SimpleFetcherCache] = List( - biosamplesFetcherCache, - credibleSetFetcherCache, - studyFetcherCache, - hpoFetcherCache, - goFetcherCache, - variantFetcherCache, - targetsFetcherCache, - drugsFetcherCache, - drugWarningsFetcherCache, - diseasesFetcherCache, - mechanismsOfActionFetcherCache, - mousePhenotypesFetcherCache, - expressionFetcherCache, - otarProjectsFetcherCache, - pharmacogenomicsByDrugFetcherCache, - pharmacogenomicsByVariantFetcherCache, - pharmacogenomicsByTargetFetcherCache, - soTermsFetcherCache, - targetEssentialityFetcherCache, - targetPrioritisationFetcherCache, - clinicalReportFetcherCache - ) - fetchers.foreach(_.clear()) + logger.info("clearing GraphQL cache") + caches.values.foreach(_.clear()) } + def cacheStats(): Unit = + logger.info( + s"Fetcher cache stats: ${caches.map { case (name, c) => s"$name: ${c.stats()}" }.mkString(", ")}" + ) } diff --git a/conf/application.conf b/conf/application.conf index b3be76ba..6fac8812 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -301,6 +301,10 @@ ot { } ignoreCache = "false" ignoreCache = ${?PLATFORM_API_IGNORE_CACHE} + cache { + fetcherMaxMb = 10240 // 10 GB default + fetcherMaxMb = ${?PLATFORM_API_FETCHER_CACHE_MAX_MB} + } qValidationLimitNTerms = "2000" qValidationLimitNTerms = ${?PLATFORM_API_MAX_QUERY_TERMS} } diff --git a/test/controllers/CacheControllerTest.scala b/test/controllers/CacheControllerTest.scala index a80ceaee..b65da618 100644 --- a/test/controllers/CacheControllerTest.scala +++ b/test/controllers/CacheControllerTest.scala @@ -43,7 +43,7 @@ class CacheControllerTest "be cleared" taggedAs IntegrationTestTag in { // given: a seeded cache - val drugCache = Fetchers.drugsFetcherCache + val drugCache = Fetchers.cacheFor("drug") val query = Json.parse( """{ "query": "query { drugs(chemblIds: [\"CHEMBL221959\", \"CHEMBL2103743\"]) { id } }"}"""