From f6b324fb1542b8227a9d84d4bfc79d4799035e16 Mon Sep 17 00:00:00 2001 From: James R Kane Date: Fri, 12 Dec 2025 13:47:17 -0600 Subject: [PATCH 1/6] feat: Add provenance tracking for haplogroup nodes and merge APIs - Add `provenance` column (JSONB) to `haplogroup` table for multi-source attribution; include GIN index for efficient querying. - Introduce `HaplogroupProvenance` domain model for tracking credits and merge info. - Implement `HaplogroupTreeMergeController` with endpoints for tree merges, subtree merges, and previews. - Add unit tests for provenance behavior and controller logic. - Enhance schema evolution with rollback support for `provenance` column. --- .../HaplogroupTreeMergeController.scala | 130 ++++ app/models/HaplogroupType.scala | 10 + .../api/haplogroups/TreeMergeModels.scala | 239 ++++++ app/models/dal/MyPostgresProfile.scala | 17 + .../domain/haplogroups/HaplogroupsTable.scala | 7 +- .../domain/haplogroups/Haplogroup.scala | 3 +- .../haplogroups/HaplogroupProvenance.scala | 102 +++ app/modules/ServicesModule.scala | 1 + .../HaplogroupCoreRepository.scala | 58 +- app/services/HaplogroupTreeMergeService.scala | 626 ++++++++++++++++ conf/evolutions/default/52.sql | 15 + conf/routes | 7 + .../HaplogroupTreeMergeControllerSpec.scala | 514 +++++++++++++ .../api/haplogroups/TreeMergeModelsSpec.scala | 528 +++++++++++++ .../HaplogroupProvenanceSpec.scala | 338 +++++++++ .../HaplogroupTreeMergeServiceSpec.scala | 702 ++++++++++++++++++ 16 files changed, 3293 insertions(+), 4 deletions(-) create mode 100644 app/controllers/HaplogroupTreeMergeController.scala create mode 100644 app/models/api/haplogroups/TreeMergeModels.scala create mode 100644 app/models/domain/haplogroups/HaplogroupProvenance.scala create mode 100644 app/services/HaplogroupTreeMergeService.scala create mode 100644 conf/evolutions/default/52.sql create mode 100644 test/controllers/HaplogroupTreeMergeControllerSpec.scala create mode 100644 test/models/api/haplogroups/TreeMergeModelsSpec.scala create mode 100644 test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala create mode 100644 test/services/HaplogroupTreeMergeServiceSpec.scala diff --git a/app/controllers/HaplogroupTreeMergeController.scala b/app/controllers/HaplogroupTreeMergeController.scala new file mode 100644 index 0000000..41328ab --- /dev/null +++ b/app/controllers/HaplogroupTreeMergeController.scala @@ -0,0 +1,130 @@ +package controllers + +import actions.ApiSecurityAction +import jakarta.inject.{Inject, Singleton} +import models.api.haplogroups.* +import play.api.Logger +import play.api.libs.json.Json +import play.api.mvc.{Action, BaseController, ControllerComponents} +import services.HaplogroupTreeMergeService + +import scala.concurrent.ExecutionContext + +/** + * API controller for haplogroup tree merge operations. + * Secured with X-API-Key authentication. + * + * Endpoints: + * - POST /api/v1/manage/haplogroups/merge - Full tree merge + * - POST /api/v1/manage/haplogroups/merge/subtree - Subtree merge under anchor + * - POST /api/v1/manage/haplogroups/merge/preview - Preview merge without changes + */ +@Singleton +class HaplogroupTreeMergeController @Inject()( + val controllerComponents: ControllerComponents, + secureApi: ApiSecurityAction, + mergeService: HaplogroupTreeMergeService +)(implicit ec: ExecutionContext) extends BaseController { + + private val logger = Logger(this.getClass) + + /** + * Merge a full haplogroup tree, replacing the existing tree for the given type. + * + * Request body: TreeMergeRequest + * - haplogroupType: "Y" or "MT" + * - sourceTree: Nested PhyloNodeInput tree structure + * - sourceName: Attribution source (e.g., "ytree.net", "ISOGG") + * - priorityConfig: Optional source priority ordering + * - conflictStrategy: Optional conflict resolution strategy + * - dryRun: If true, simulates merge without applying changes + */ + def mergeFullTree(): Action[TreeMergeRequest] = + secureApi.jsonAction[TreeMergeRequest].async { request => + logger.info(s"API: Full tree merge for ${request.body.haplogroupType} from ${request.body.sourceName}" + + (if (request.body.dryRun) " (dry run)" else "")) + + mergeService.mergeFullTree(request.body).map { response => + if (response.success) { + Ok(Json.toJson(response)) + } else { + BadRequest(Json.toJson(response)) + } + }.recover { case e: Exception => + logger.error(s"Tree merge failed: ${e.getMessage}", e) + InternalServerError(Json.obj( + "success" -> false, + "message" -> "Merge operation failed", + "errors" -> List(e.getMessage) + )) + } + } + + /** + * Merge a subtree under a specific anchor haplogroup. + * + * Request body: SubtreeMergeRequest + * - haplogroupType: "Y" or "MT" + * - anchorHaplogroupName: Name of the haplogroup to merge under + * - sourceTree: Nested PhyloNodeInput tree structure + * - sourceName: Attribution source + * - priorityConfig: Optional source priority ordering + * - conflictStrategy: Optional conflict resolution strategy + * - dryRun: If true, simulates merge without applying changes + */ + def mergeSubtree(): Action[SubtreeMergeRequest] = + secureApi.jsonAction[SubtreeMergeRequest].async { request => + logger.info(s"API: Subtree merge under ${request.body.anchorHaplogroupName} " + + s"for ${request.body.haplogroupType} from ${request.body.sourceName}" + + (if (request.body.dryRun) " (dry run)" else "")) + + mergeService.mergeSubtree(request.body).map { response => + if (response.success) { + Ok(Json.toJson(response)) + } else { + BadRequest(Json.toJson(response)) + } + }.recover { + case e: IllegalArgumentException => + logger.warn(s"Subtree merge validation failed: ${e.getMessage}") + BadRequest(Json.obj( + "success" -> false, + "message" -> e.getMessage, + "errors" -> List(e.getMessage) + )) + case e: Exception => + logger.error(s"Subtree merge failed: ${e.getMessage}", e) + InternalServerError(Json.obj( + "success" -> false, + "message" -> "Merge operation failed", + "errors" -> List(e.getMessage) + )) + } + } + + /** + * Preview a merge operation without applying changes. + * + * Request body: MergePreviewRequest + * - haplogroupType: "Y" or "MT" + * - anchorHaplogroupName: Optional anchor for subtree preview + * - sourceTree: Nested PhyloNodeInput tree structure + * - sourceName: Attribution source + * - priorityConfig: Optional source priority ordering + */ + def previewMerge(): Action[MergePreviewRequest] = + secureApi.jsonAction[MergePreviewRequest].async { request => + logger.info(s"API: Preview merge for ${request.body.haplogroupType} from ${request.body.sourceName}" + + request.body.anchorHaplogroupName.map(a => s" under $a").getOrElse("")) + + mergeService.previewMerge(request.body).map { response => + Ok(Json.toJson(response)) + }.recover { case e: Exception => + logger.error(s"Merge preview failed: ${e.getMessage}", e) + InternalServerError(Json.obj( + "error" -> "Preview operation failed", + "details" -> e.getMessage + )) + } + } +} diff --git a/app/models/HaplogroupType.scala b/app/models/HaplogroupType.scala index 4949cb6..e35d415 100644 --- a/app/models/HaplogroupType.scala +++ b/app/models/HaplogroupType.scala @@ -1,5 +1,6 @@ package models +import play.api.libs.json.{Format, Reads, Writes} import play.api.mvc.QueryStringBindable /** @@ -33,6 +34,15 @@ object HaplogroupType { case _ => None } + // JSON serialization + implicit val reads: Reads[HaplogroupType] = Reads.StringReads.map { str => + fromString(str).getOrElse(throw new IllegalArgumentException(s"Invalid HaplogroupType: $str")) + } + + implicit val writes: Writes[HaplogroupType] = Writes.StringWrites.contramap(_.toString) + + implicit val format: Format[HaplogroupType] = Format(reads, writes) + implicit val queryStringBindable: QueryStringBindable[HaplogroupType] = new QueryStringBindable[HaplogroupType] { def bind(key: String, params: Map[String, Seq[String]]): Option[Either[String, HaplogroupType]] = { diff --git a/app/models/api/haplogroups/TreeMergeModels.scala b/app/models/api/haplogroups/TreeMergeModels.scala new file mode 100644 index 0000000..8b98eea --- /dev/null +++ b/app/models/api/haplogroups/TreeMergeModels.scala @@ -0,0 +1,239 @@ +package models.api.haplogroups + +import models.HaplogroupType +import play.api.libs.json.{Format, Json, OFormat, Reads, Writes} + +/** + * API DTOs for Haplogroup Tree Merge operations. + * + * Supports merging external haplogroup trees from sources like ISOGG, ytree.net, + * and other researchers into the DecodingUs baseline tree. + */ + +// ============================================================================ +// Input Tree Structure +// ============================================================================ + +/** + * A node in the input phylogenetic tree for merging. + * Matching is done by variants, not names, to handle different naming conventions. + */ +case class PhyloNodeInput( + name: String, + variants: List[String] = List.empty, + formedYbp: Option[Int] = None, + formedYbpLower: Option[Int] = None, + formedYbpUpper: Option[Int] = None, + tmrcaYbp: Option[Int] = None, + tmrcaYbpLower: Option[Int] = None, + tmrcaYbpUpper: Option[Int] = None, + children: List[PhyloNodeInput] = List.empty +) + +object PhyloNodeInput { + implicit val format: OFormat[PhyloNodeInput] = Json.format[PhyloNodeInput] +} + +// ============================================================================ +// Merge Configuration +// ============================================================================ + +/** + * Configuration for source priority during merge. + * Lower index = higher priority. + */ +case class SourcePriorityConfig( + sourcePriorities: List[String], + defaultPriority: Int = 100 +) + +object SourcePriorityConfig { + implicit val format: OFormat[SourcePriorityConfig] = Json.format[SourcePriorityConfig] +} + +/** + * Strategy for handling conflicts during merge. + */ +sealed trait ConflictStrategy + +object ConflictStrategy { + case object HigherPriorityWins extends ConflictStrategy + case object KeepExisting extends ConflictStrategy + case object AlwaysUpdate extends ConflictStrategy + + implicit val reads: Reads[ConflictStrategy] = Reads.StringReads.map { + case "higher_priority_wins" => HigherPriorityWins + case "keep_existing" => KeepExisting + case "always_update" => AlwaysUpdate + case other => throw new IllegalArgumentException(s"Unknown conflict strategy: $other") + } + + implicit val writes: Writes[ConflictStrategy] = Writes.StringWrites.contramap { + case HigherPriorityWins => "higher_priority_wins" + case KeepExisting => "keep_existing" + case AlwaysUpdate => "always_update" + } + + implicit val format: Format[ConflictStrategy] = Format(reads, writes) +} + +// ============================================================================ +// Request DTOs +// ============================================================================ + +/** + * Request for full tree merge (replace entire Y-DNA or mtDNA tree). + */ +case class TreeMergeRequest( + haplogroupType: HaplogroupType, + sourceTree: PhyloNodeInput, + sourceName: String, + priorityConfig: Option[SourcePriorityConfig] = None, + conflictStrategy: Option[ConflictStrategy] = None, + dryRun: Boolean = false +) + +object TreeMergeRequest { + implicit val format: OFormat[TreeMergeRequest] = Json.format[TreeMergeRequest] +} + +/** + * Request for subtree merge (merge under a specific anchor node). + */ +case class SubtreeMergeRequest( + haplogroupType: HaplogroupType, + anchorHaplogroupName: String, + sourceTree: PhyloNodeInput, + sourceName: String, + priorityConfig: Option[SourcePriorityConfig] = None, + conflictStrategy: Option[ConflictStrategy] = None, + dryRun: Boolean = false +) + +object SubtreeMergeRequest { + implicit val format: OFormat[SubtreeMergeRequest] = Json.format[SubtreeMergeRequest] +} + +/** + * Request for merge preview. + */ +case class MergePreviewRequest( + haplogroupType: HaplogroupType, + anchorHaplogroupName: Option[String] = None, + sourceTree: PhyloNodeInput, + sourceName: String, + priorityConfig: Option[SourcePriorityConfig] = None +) + +object MergePreviewRequest { + implicit val format: OFormat[MergePreviewRequest] = Json.format[MergePreviewRequest] +} + +// ============================================================================ +// Response DTOs +// ============================================================================ + +/** + * Statistics from a merge operation. + */ +case class MergeStatistics( + nodesProcessed: Int, + nodesCreated: Int, + nodesUpdated: Int, + nodesUnchanged: Int, + variantsAdded: Int, + variantsUpdated: Int, + relationshipsCreated: Int, + relationshipsUpdated: Int, + splitOperations: Int = 0 +) + +object MergeStatistics { + implicit val format: OFormat[MergeStatistics] = Json.format[MergeStatistics] + + val empty: MergeStatistics = MergeStatistics(0, 0, 0, 0, 0, 0, 0, 0, 0) + + def combine(a: MergeStatistics, b: MergeStatistics): MergeStatistics = MergeStatistics( + nodesProcessed = a.nodesProcessed + b.nodesProcessed, + nodesCreated = a.nodesCreated + b.nodesCreated, + nodesUpdated = a.nodesUpdated + b.nodesUpdated, + nodesUnchanged = a.nodesUnchanged + b.nodesUnchanged, + variantsAdded = a.variantsAdded + b.variantsAdded, + variantsUpdated = a.variantsUpdated + b.variantsUpdated, + relationshipsCreated = a.relationshipsCreated + b.relationshipsCreated, + relationshipsUpdated = a.relationshipsUpdated + b.relationshipsUpdated, + splitOperations = a.splitOperations + b.splitOperations + ) +} + +/** + * Details of a conflict encountered during merge. + */ +case class MergeConflict( + haplogroupName: String, + field: String, + existingValue: String, + newValue: String, + resolution: String, + existingSource: String, + newSource: String +) + +object MergeConflict { + implicit val format: OFormat[MergeConflict] = Json.format[MergeConflict] +} + +/** + * Details of a split operation performed during merge. + */ +case class SplitOperation( + parentName: String, + newIntermediateName: String, + variantsRedistributed: List[String], + childrenReassigned: List[String], + source: String +) + +object SplitOperation { + implicit val format: OFormat[SplitOperation] = Json.format[SplitOperation] +} + +/** + * Result of a merge operation. + */ +case class TreeMergeResponse( + success: Boolean, + message: String, + statistics: MergeStatistics, + conflicts: List[MergeConflict] = List.empty, + splits: List[SplitOperation] = List.empty, + errors: List[String] = List.empty +) + +object TreeMergeResponse { + implicit val format: OFormat[TreeMergeResponse] = Json.format[TreeMergeResponse] + + def failure(message: String, errors: List[String] = List.empty): TreeMergeResponse = + TreeMergeResponse( + success = false, + message = message, + statistics = MergeStatistics.empty, + errors = errors + ) +} + +/** + * Preview of merge results (without applying changes). + */ +case class MergePreviewResponse( + statistics: MergeStatistics, + conflicts: List[MergeConflict], + splits: List[SplitOperation], + newNodes: List[String], + updatedNodes: List[String], + unchangedNodes: List[String] +) + +object MergePreviewResponse { + implicit val format: OFormat[MergePreviewResponse] = Json.format[MergePreviewResponse] +} diff --git a/app/models/dal/MyPostgresProfile.scala b/app/models/dal/MyPostgresProfile.scala index fea3530..7112406 100644 --- a/app/models/dal/MyPostgresProfile.scala +++ b/app/models/dal/MyPostgresProfile.scala @@ -310,6 +310,23 @@ trait MyPostgresProfile extends ExPostgresProfile ) } + // --- Haplogroup Provenance JSONB Type Mapper --- + import models.domain.haplogroups.HaplogroupProvenance + + implicit val haplogroupProvenanceJsonbTypeMapper: JdbcType[Option[HaplogroupProvenance]] with BaseTypedType[Option[HaplogroupProvenance]] = { + import play.api.libs.json.{JsNull, JsObject} + MappedJdbcType.base[Option[HaplogroupProvenance], JsValue]( + { + case Some(prov) => Json.toJson(prov) + case None => JsNull + }, + { jsValue => + if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None + else Some(jsValue.as[HaplogroupProvenance]) + } + ) + } + // Declare the name of an aggregate function: val ArrayAgg = new SqlAggregateFunction("array_agg") diff --git a/app/models/dal/domain/haplogroups/HaplogroupsTable.scala b/app/models/dal/domain/haplogroups/HaplogroupsTable.scala index 1a71964..ab8e018 100644 --- a/app/models/dal/domain/haplogroups/HaplogroupsTable.scala +++ b/app/models/dal/domain/haplogroups/HaplogroupsTable.scala @@ -2,7 +2,7 @@ package models.dal.domain.haplogroups import models.HaplogroupType import models.dal.MyPostgresProfile.api.* -import models.domain.haplogroups.Haplogroup +import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} import slick.ast.TypedType import slick.lifted.{MappedProjection, ProvenShape} @@ -71,8 +71,11 @@ class HaplogroupsTable(tag: Tag) extends Table[Haplogroup](tag, Some("tree"), "h def ageEstimateSource = column[Option[String]]("age_estimate_source") + // Multi-source provenance tracking (JSONB) + def provenance = column[Option[HaplogroupProvenance]]("provenance") + def * = ( haplogroupId.?, name, lineage, description, haplogroupType, revisionId, source, confidenceLevel, validFrom, validUntil, - formedYbp, formedYbpLower, formedYbpUpper, tmrcaYbp, tmrcaYbpLower, tmrcaYbpUpper, ageEstimateSource + formedYbp, formedYbpLower, formedYbpUpper, tmrcaYbp, tmrcaYbpLower, tmrcaYbpUpper, ageEstimateSource, provenance ).mapTo[Haplogroup] } diff --git a/app/models/domain/haplogroups/Haplogroup.scala b/app/models/domain/haplogroups/Haplogroup.scala index 7b0ce5a..d177ffe 100644 --- a/app/models/domain/haplogroups/Haplogroup.scala +++ b/app/models/domain/haplogroups/Haplogroup.scala @@ -80,7 +80,8 @@ case class Haplogroup( tmrcaYbp: Option[Int] = None, tmrcaYbpLower: Option[Int] = None, tmrcaYbpUpper: Option[Int] = None, - ageEstimateSource: Option[String] = None + ageEstimateSource: Option[String] = None, + provenance: Option[HaplogroupProvenance] = None ) { /** Get formed date as AgeEstimate if available */ def formedEstimate: Option[AgeEstimate] = formedYbp.map(y => AgeEstimate(y, formedYbpLower, formedYbpUpper)) diff --git a/app/models/domain/haplogroups/HaplogroupProvenance.scala b/app/models/domain/haplogroups/HaplogroupProvenance.scala new file mode 100644 index 0000000..cb98a14 --- /dev/null +++ b/app/models/domain/haplogroups/HaplogroupProvenance.scala @@ -0,0 +1,102 @@ +package models.domain.haplogroups + +import play.api.libs.json.{Json, OFormat, Format, Reads, Writes} + +import java.time.LocalDateTime + +/** + * Tracks the provenance of a haplogroup node and its variants from multiple sources. + * + * Credit assignment follows a tiered model: + * - ISOGG credit is preserved on existing nodes (authoritative backbone) + * - Incoming sources get credit for new splits and terminal branches they contribute + * + * @param primaryCredit Source with primary discovery credit for this node + * @param nodeProvenance All sources that have contributed to this node's existence + * @param variantProvenance Per-variant source attribution (variant name -> set of sources) + * @param lastMergedAt Timestamp of the most recent merge operation affecting this node + * @param lastMergedFrom Source of the most recent merge operation + */ +case class HaplogroupProvenance( + primaryCredit: String, + nodeProvenance: Set[String] = Set.empty, + variantProvenance: Map[String, Set[String]] = Map.empty, + lastMergedAt: Option[LocalDateTime] = None, + lastMergedFrom: Option[String] = None +) { + + /** + * Add a source to nodeProvenance. + */ + def addNodeSource(source: String): HaplogroupProvenance = + copy(nodeProvenance = nodeProvenance + source) + + /** + * Add a source attribution for a specific variant. + */ + def addVariantSource(variantName: String, source: String): HaplogroupProvenance = + copy(variantProvenance = variantProvenance.updatedWith(variantName) { + case Some(sources) => Some(sources + source) + case None => Some(Set(source)) + }) + + /** + * Merge another provenance record into this one, combining all sources. + */ + def merge(other: HaplogroupProvenance): HaplogroupProvenance = { + val mergedVariants = (variantProvenance.keySet ++ other.variantProvenance.keySet).map { key => + key -> (variantProvenance.getOrElse(key, Set.empty) ++ other.variantProvenance.getOrElse(key, Set.empty)) + }.toMap + + HaplogroupProvenance( + primaryCredit = this.primaryCredit, // Preserve existing primary credit + nodeProvenance = nodeProvenance ++ other.nodeProvenance, + variantProvenance = mergedVariants, + lastMergedAt = Seq(lastMergedAt, other.lastMergedAt).flatten.maxOption, + lastMergedFrom = other.lastMergedFrom.orElse(lastMergedFrom) + ) + } + + /** + * Update merge timestamp and source. + */ + def withMergeInfo(source: String, timestamp: LocalDateTime): HaplogroupProvenance = + copy(lastMergedAt = Some(timestamp), lastMergedFrom = Some(source)) +} + +object HaplogroupProvenance { + // Custom JSON format to handle Set[String] and Map[String, Set[String]] + implicit val setStringFormat: Format[Set[String]] = Format( + Reads.seq[String].map(_.toSet), + Writes.seq[String].contramap(_.toSeq) + ) + + implicit val mapStringSetFormat: Format[Map[String, Set[String]]] = Format( + Reads.map[Set[String]], + Writes.map[Set[String]] + ) + + implicit val format: OFormat[HaplogroupProvenance] = Json.format[HaplogroupProvenance] + + val empty: HaplogroupProvenance = HaplogroupProvenance(primaryCredit = "") + + /** + * Create initial provenance for a new node from a source. + */ + def forNewNode(source: String, variants: Seq[String] = Seq.empty): HaplogroupProvenance = { + val variantProv = variants.map(v => v -> Set(source)).toMap + HaplogroupProvenance( + primaryCredit = source, + nodeProvenance = Set(source), + variantProvenance = variantProv, + lastMergedAt = Some(LocalDateTime.now()), + lastMergedFrom = Some(source) + ) + } + + /** + * Determine if ISOGG credit should be preserved (returns true if existing credit is ISOGG). + */ + def shouldPreserveCredit(existingCredit: String): Boolean = + existingCredit.equalsIgnoreCase("ISOGG") +} diff --git a/app/modules/ServicesModule.scala b/app/modules/ServicesModule.scala index 7e83b97..f79ee3d 100644 --- a/app/modules/ServicesModule.scala +++ b/app/modules/ServicesModule.scala @@ -28,6 +28,7 @@ class ServicesModule(environment: Environment, configuration: Configuration) ext bind(classOf[services.PublicationDiscoveryService]).asEagerSingleton() bind(classOf[services.UserPermissionHelper]).asEagerSingleton() + bind(classOf[services.HaplogroupTreeMergeService]).asEagerSingleton() } } diff --git a/app/repositories/HaplogroupCoreRepository.scala b/app/repositories/HaplogroupCoreRepository.scala index d3dd40c..ca3e822 100644 --- a/app/repositories/HaplogroupCoreRepository.scala +++ b/app/repositories/HaplogroupCoreRepository.scala @@ -2,7 +2,7 @@ package repositories import jakarta.inject.Inject import models.HaplogroupType -import models.domain.haplogroups.Haplogroup +import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} import play.api.Logging import play.api.db.slick.DatabaseConfigProvider import slick.jdbc.GetResult @@ -117,6 +117,26 @@ trait HaplogroupCoreRepository { * @return a sequence of root haplogroups for that type */ def findRoots(haplogroupType: HaplogroupType): Future[Seq[Haplogroup]] + + // === Tree Merge Methods === + + /** + * Update the provenance field for a haplogroup. + * + * @param id the haplogroup ID + * @param provenance the new provenance data + * @return true if updated successfully + */ + def updateProvenance(id: Int, provenance: HaplogroupProvenance): Future[Boolean] + + /** + * Get all haplogroups of a type with their associated variant names. + * Used for building variant-based lookup index for merge operations. + * + * @param haplogroupType the type of haplogroup (Y or MT) + * @return sequence of tuples: (haplogroup, list of variant names) + */ + def getAllWithVariantNames(haplogroupType: HaplogroupType): Future[Seq[(Haplogroup, Seq[String])]] } class HaplogroupCoreRepositoryImpl @Inject()( @@ -413,4 +433,40 @@ class HaplogroupCoreRepositoryImpl @Inject()( runQuery(query) } + + // === Tree Merge Methods Implementation === + + override def updateProvenance(id: Int, provenance: HaplogroupProvenance): Future[Boolean] = { + runQuery( + haplogroups + .filter(_.haplogroupId === id) + .map(_.provenance) + .update(Some(provenance)) + ).map(_ > 0) + } + + override def getAllWithVariantNames(haplogroupType: HaplogroupType): Future[Seq[(Haplogroup, Seq[String])]] = { + import models.dal.DatabaseSchema.domain.haplogroups.haplogroupVariants + import models.dal.DatabaseSchema.domain.genomics.variants + + // Query haplogroups with their associated variant names via join + val query = for { + hg <- activeHaplogroups.filter(_.haplogroupType === haplogroupType) + } yield hg + + runQuery(query.result).flatMap { hgList => + // For each haplogroup, fetch its variant names (using commonName from Variant table) + val futures = hgList.map { hg => + val variantQuery = for { + hv <- haplogroupVariants.filter(_.haplogroupId === hg.id.get) + v <- variants.filter(_.variantId === hv.variantId) + } yield v.commonName + + runQuery(variantQuery.result).map { variantNames => + (hg, variantNames.flatten) // Filter out None values + } + } + Future.sequence(futures) + } + } } diff --git a/app/services/HaplogroupTreeMergeService.scala b/app/services/HaplogroupTreeMergeService.scala new file mode 100644 index 0000000..f3a6f93 --- /dev/null +++ b/app/services/HaplogroupTreeMergeService.scala @@ -0,0 +1,626 @@ +package services + +import jakarta.inject.{Inject, Singleton} +import models.HaplogroupType +import models.api.haplogroups.* +import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} +import play.api.Logging +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantRepository} + +import java.time.LocalDateTime +import scala.concurrent.{ExecutionContext, Future} + +/** + * Service for merging external haplogroup trees into the DecodingUs baseline tree. + * + * Key features: + * - Variant-based matching: Nodes are matched by their defining variants, not names, + * to handle different naming conventions across sources (ytree.net, ISOGG, researchers) + * - Credit assignment: ISOGG credit preserved on existing nodes; incoming sources get + * credit for new splits and terminal branches they contribute + * - Multi-source provenance: Full attribution tracking via JSONB column + * - Branch split detection: Identifies when incoming data reveals finer tree structure + */ +@Singleton +class HaplogroupTreeMergeService @Inject()( + haplogroupRepository: HaplogroupCoreRepository, + haplogroupVariantRepository: HaplogroupVariantRepository, + variantRepository: VariantRepository +)(implicit ec: ExecutionContext) extends Logging { + + /** + * Merge a full tree, replacing the existing tree for the given haplogroup type. + */ + def mergeFullTree(request: TreeMergeRequest): Future[TreeMergeResponse] = { + if (request.dryRun) { + previewMerge(MergePreviewRequest( + haplogroupType = request.haplogroupType, + anchorHaplogroupName = None, + sourceTree = request.sourceTree, + sourceName = request.sourceName, + priorityConfig = request.priorityConfig + )).map(preview => TreeMergeResponse( + success = true, + message = "Dry run completed successfully", + statistics = preview.statistics, + conflicts = preview.conflicts, + splits = preview.splits + )) + } else { + performMerge( + haplogroupType = request.haplogroupType, + anchorId = None, + sourceTree = request.sourceTree, + sourceName = request.sourceName, + priorityConfig = request.priorityConfig.getOrElse(SourcePriorityConfig(List.empty)), + conflictStrategy = request.conflictStrategy.getOrElse(ConflictStrategy.HigherPriorityWins) + ) + } + } + + /** + * Merge a subtree under a specific anchor haplogroup. + */ + def mergeSubtree(request: SubtreeMergeRequest): Future[TreeMergeResponse] = { + if (request.dryRun) { + previewMerge(MergePreviewRequest( + haplogroupType = request.haplogroupType, + anchorHaplogroupName = Some(request.anchorHaplogroupName), + sourceTree = request.sourceTree, + sourceName = request.sourceName, + priorityConfig = request.priorityConfig + )).map(preview => TreeMergeResponse( + success = true, + message = "Dry run completed successfully", + statistics = preview.statistics, + conflicts = preview.conflicts, + splits = preview.splits + )) + } else { + for { + // Find the anchor haplogroup + anchorOpt <- haplogroupRepository.getHaplogroupByName(request.anchorHaplogroupName, request.haplogroupType) + anchor = anchorOpt.getOrElse( + throw new IllegalArgumentException(s"Anchor haplogroup '${request.anchorHaplogroupName}' not found") + ) + + result <- performMerge( + haplogroupType = request.haplogroupType, + anchorId = anchor.id, + sourceTree = request.sourceTree, + sourceName = request.sourceName, + priorityConfig = request.priorityConfig.getOrElse(SourcePriorityConfig(List.empty)), + conflictStrategy = request.conflictStrategy.getOrElse(ConflictStrategy.HigherPriorityWins) + ) + } yield result + } + } + + /** + * Preview merge without applying changes. + */ + def previewMerge(request: MergePreviewRequest): Future[MergePreviewResponse] = { + for { + // Build variant-based index of existing haplogroups + existingIndex <- buildVariantIndex(request.haplogroupType) + + // Simulate the merge to collect statistics + preview <- simulateMerge( + sourceTree = request.sourceTree, + sourceName = request.sourceName, + existingIndex = existingIndex, + priorityConfig = request.priorityConfig.getOrElse(SourcePriorityConfig(List.empty)) + ) + } yield preview + } + + // ============================================================================ + // Private Implementation + // ============================================================================ + + /** + * Build an index of existing haplogroups by their variant names. + * This enables variant-based matching across different naming conventions. + */ + private def buildVariantIndex(haplogroupType: HaplogroupType): Future[VariantIndex] = { + haplogroupRepository.getAllWithVariantNames(haplogroupType).map { haplogroupsWithVariants => + val variantToHaplogroup = haplogroupsWithVariants.flatMap { case (hg, variants) => + variants.map(v => v.toUpperCase -> hg) + }.groupMap(_._1)(_._2) + + val haplogroupByName = haplogroupsWithVariants.map { case (hg, _) => + hg.name.toUpperCase -> hg + }.toMap + + VariantIndex(variantToHaplogroup, haplogroupByName) + } + } + + /** + * Perform the actual merge operation. + */ + private def performMerge( + haplogroupType: HaplogroupType, + anchorId: Option[Int], + sourceTree: PhyloNodeInput, + sourceName: String, + priorityConfig: SourcePriorityConfig, + conflictStrategy: ConflictStrategy + ): Future[TreeMergeResponse] = { + val now = LocalDateTime.now() + val context = MergeContext( + haplogroupType = haplogroupType, + sourceName = sourceName, + priorityConfig = priorityConfig, + conflictStrategy = conflictStrategy, + timestamp = now + ) + + for { + // Build variant-based index + existingIndex <- buildVariantIndex(haplogroupType) + + // Perform recursive merge + result <- mergeNode( + node = sourceTree, + parentId = anchorId, + context = context, + index = existingIndex, + accumulator = MergeAccumulator.empty + ) + } yield TreeMergeResponse( + success = result.errors.isEmpty, + message = if (result.errors.isEmpty) "Merge completed successfully" else "Merge completed with errors", + statistics = result.statistics, + conflicts = result.conflicts, + splits = result.splits, + errors = result.errors + ) + } + + /** + * Recursively merge a node and its children. + */ + private def mergeNode( + node: PhyloNodeInput, + parentId: Option[Int], + context: MergeContext, + index: VariantIndex, + accumulator: MergeAccumulator + ): Future[MergeAccumulator] = { + // Try to find existing haplogroup by variants first, then by name + val existingMatch = findExistingMatch(node, index) + + existingMatch match { + case Some(existing) => + // Node exists - check for updates or splits + mergeExistingNode(node, existing, parentId, context, index, accumulator) + + case None => + // New node - create it + createNewNode(node, parentId, context, index, accumulator) + } + } + + /** + * Find an existing haplogroup that matches the input node. + * Primary matching is by variants; fallback is by name. + */ + private def findExistingMatch(node: PhyloNodeInput, index: VariantIndex): Option[Haplogroup] = { + // First try variant-based matching + val variantMatches = node.variants + .flatMap(v => index.variantToHaplogroup.getOrElse(v.toUpperCase, Seq.empty)) + .groupBy(identity) + .view.mapValues(_.size) + .toSeq + .sortBy(-_._2) // Sort by match count descending + + // Find haplogroup with most variant matches (>= 1) + variantMatches.headOption.filter(_._2 >= 1).map(_._1).orElse { + // Fallback: match by name + index.haplogroupByName.get(node.name.toUpperCase) + } + } + + /** + * Merge an input node with an existing haplogroup. + */ + private def mergeExistingNode( + node: PhyloNodeInput, + existing: Haplogroup, + parentId: Option[Int], + context: MergeContext, + index: VariantIndex, + accumulator: MergeAccumulator + ): Future[MergeAccumulator] = { + val conflicts = scala.collection.mutable.ListBuffer.empty[MergeConflict] + + // Check for field conflicts + val existingSource = existing.provenance.map(_.primaryCredit).getOrElse(existing.source) + + // Determine if we should update based on conflict strategy + val shouldUpdate = context.conflictStrategy match { + case ConflictStrategy.AlwaysUpdate => true + case ConflictStrategy.KeepExisting => false + case ConflictStrategy.HigherPriorityWins => + getPriority(context.sourceName, context.priorityConfig) < + getPriority(existingSource, context.priorityConfig) + } + + // Check for age estimate conflicts + if (node.formedYbp.isDefined && existing.formedYbp.isDefined && + node.formedYbp != existing.formedYbp) { + conflicts += MergeConflict( + haplogroupName = existing.name, + field = "formedYbp", + existingValue = existing.formedYbp.get.toString, + newValue = node.formedYbp.get.toString, + resolution = if (shouldUpdate) "updated" else "kept_existing", + existingSource = existingSource, + newSource = context.sourceName + ) + } + + for { + // Update provenance to track this merge + _ <- updateProvenance(existing, node.variants, context) + + // Update age estimates if applicable + _ <- if (shouldUpdate && hasAgeEstimates(node)) { + updateAgeEstimates(existing.id.get, node, context.sourceName) + } else { + Future.successful(()) + } + + // Update statistics + updatedStats = if (shouldUpdate && conflicts.nonEmpty) { + accumulator.statistics.copy( + nodesProcessed = accumulator.statistics.nodesProcessed + 1, + nodesUpdated = accumulator.statistics.nodesUpdated + 1 + ) + } else { + accumulator.statistics.copy( + nodesProcessed = accumulator.statistics.nodesProcessed + 1, + nodesUnchanged = accumulator.statistics.nodesUnchanged + 1 + ) + } + + // Recursively process children + childrenResult <- processChildren( + children = node.children, + parentId = existing.id, + context = context, + index = index, + accumulator = accumulator.copy( + statistics = updatedStats, + conflicts = accumulator.conflicts ++ conflicts.toList + ) + ) + } yield childrenResult + } + + /** + * Create a new haplogroup node. + */ + private def createNewNode( + node: PhyloNodeInput, + parentId: Option[Int], + context: MergeContext, + index: VariantIndex, + accumulator: MergeAccumulator + ): Future[MergeAccumulator] = { + // Determine credit - incoming source gets credit for new nodes + val primaryCredit = context.sourceName + val provenance = HaplogroupProvenance.forNewNode(context.sourceName, node.variants) + + val newHaplogroup = Haplogroup( + id = None, + name = node.name, + lineage = None, + description = None, + haplogroupType = context.haplogroupType, + revisionId = 1, + source = context.sourceName, + confidenceLevel = "medium", + validFrom = context.timestamp, + validUntil = None, + formedYbp = node.formedYbp, + formedYbpLower = node.formedYbpLower, + formedYbpUpper = node.formedYbpUpper, + tmrcaYbp = node.tmrcaYbp, + tmrcaYbpLower = node.tmrcaYbpLower, + tmrcaYbpUpper = node.tmrcaYbpUpper, + ageEstimateSource = Some(context.sourceName), + provenance = Some(provenance) + ) + + for { + // Create the haplogroup with parent relationship + newId <- haplogroupRepository.createWithParent(newHaplogroup, parentId, context.sourceName) + + // Associate variants with the new haplogroup + variantCount <- associateVariants(newId, node.variants) + + // Update statistics + updatedStats = accumulator.statistics.copy( + nodesProcessed = accumulator.statistics.nodesProcessed + 1, + nodesCreated = accumulator.statistics.nodesCreated + 1, + variantsAdded = accumulator.statistics.variantsAdded + variantCount, + relationshipsCreated = if (parentId.isDefined) + accumulator.statistics.relationshipsCreated + 1 + else + accumulator.statistics.relationshipsCreated + ) + + // Update index with new haplogroup + updatedIndex = index.copy( + haplogroupByName = index.haplogroupByName + (node.name.toUpperCase -> newHaplogroup.copy(id = Some(newId))), + variantToHaplogroup = node.variants.foldLeft(index.variantToHaplogroup) { (idx, v) => + idx.updatedWith(v.toUpperCase) { + case Some(hgs) => Some(hgs :+ newHaplogroup.copy(id = Some(newId))) + case None => Some(Seq(newHaplogroup.copy(id = Some(newId)))) + } + } + ) + + // Recursively process children + childrenResult <- processChildren( + children = node.children, + parentId = Some(newId), + context = context, + index = updatedIndex, + accumulator = accumulator.copy(statistics = updatedStats) + ) + } yield childrenResult + } + + /** + * Process child nodes recursively. + */ + private def processChildren( + children: List[PhyloNodeInput], + parentId: Option[Int], + context: MergeContext, + index: VariantIndex, + accumulator: MergeAccumulator + ): Future[MergeAccumulator] = { + children.foldLeft(Future.successful(accumulator)) { (accFuture, child) => + accFuture.flatMap { acc => + mergeNode(child, parentId, context, index, acc) + } + } + } + + /** + * Update provenance for an existing haplogroup. + */ + private def updateProvenance( + existing: Haplogroup, + newVariants: List[String], + context: MergeContext + ): Future[Boolean] = { + val existingProvenance = existing.provenance.getOrElse( + HaplogroupProvenance(primaryCredit = existing.source, nodeProvenance = Set(existing.source)) + ) + + // Preserve ISOGG credit + val primaryCredit = if (HaplogroupProvenance.shouldPreserveCredit(existingProvenance.primaryCredit)) { + existingProvenance.primaryCredit + } else { + existingProvenance.primaryCredit // Keep existing credit for non-ISOGG too + } + + // Add new source to node provenance + val updatedNodeProv = existingProvenance.nodeProvenance + context.sourceName + + // Add variant provenance for new variants + val updatedVariantProv = newVariants.foldLeft(existingProvenance.variantProvenance) { (prov, variant) => + prov.updatedWith(variant) { + case Some(sources) => Some(sources + context.sourceName) + case None => Some(Set(context.sourceName)) + } + } + + val updatedProvenance = HaplogroupProvenance( + primaryCredit = primaryCredit, + nodeProvenance = updatedNodeProv, + variantProvenance = updatedVariantProv, + lastMergedAt = Some(context.timestamp), + lastMergedFrom = Some(context.sourceName) + ) + + haplogroupRepository.updateProvenance(existing.id.get, updatedProvenance) + } + + /** + * Update age estimates for a haplogroup. + */ + private def updateAgeEstimates( + haplogroupId: Int, + node: PhyloNodeInput, + sourceName: String + ): Future[Boolean] = { + haplogroupRepository.findById(haplogroupId).flatMap { + case Some(existing) => + val updated = existing.copy( + formedYbp = node.formedYbp.orElse(existing.formedYbp), + formedYbpLower = node.formedYbpLower.orElse(existing.formedYbpLower), + formedYbpUpper = node.formedYbpUpper.orElse(existing.formedYbpUpper), + tmrcaYbp = node.tmrcaYbp.orElse(existing.tmrcaYbp), + tmrcaYbpLower = node.tmrcaYbpLower.orElse(existing.tmrcaYbpLower), + tmrcaYbpUpper = node.tmrcaYbpUpper.orElse(existing.tmrcaYbpUpper), + ageEstimateSource = Some(sourceName) + ) + haplogroupRepository.update(updated) + case None => + Future.successful(false) + } + } + + /** + * Associate variants with a haplogroup, finding or creating variants as needed. + */ + private def associateVariants(haplogroupId: Int, variantNames: List[String]): Future[Int] = { + if (variantNames.isEmpty) { + Future.successful(0) + } else { + // For each variant name, find existing variants by name and associate them + Future.traverse(variantNames) { variantName => + variantRepository.searchByName(variantName).flatMap { variants => + // Associate all found variants with this haplogroup + Future.traverse(variants) { variant => + variant.variantId match { + case Some(vid) => haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, vid) + case None => Future.successful(0) + } + } + }.map(_.sum) + }.map(_.sum) + } + } + + /** + * Get priority for a source (lower = higher priority). + */ + private def getPriority(source: String, config: SourcePriorityConfig): Int = { + config.sourcePriorities.indexOf(source) match { + case -1 => config.defaultPriority + case idx => idx + } + } + + /** + * Check if node has any age estimates. + */ + private def hasAgeEstimates(node: PhyloNodeInput): Boolean = { + node.formedYbp.isDefined || node.tmrcaYbp.isDefined + } + + /** + * Simulate merge without applying changes (for preview). + */ + private def simulateMerge( + sourceTree: PhyloNodeInput, + sourceName: String, + existingIndex: VariantIndex, + priorityConfig: SourcePriorityConfig + ): Future[MergePreviewResponse] = { + // Recursively analyze the tree + val (stats, conflicts, splits, newNodes, updatedNodes, unchangedNodes) = + analyzeTree(sourceTree, existingIndex, sourceName, priorityConfig) + + Future.successful(MergePreviewResponse( + statistics = stats, + conflicts = conflicts, + splits = splits, + newNodes = newNodes, + updatedNodes = updatedNodes, + unchangedNodes = unchangedNodes + )) + } + + /** + * Analyze tree structure for preview without making changes. + */ + private def analyzeTree( + node: PhyloNodeInput, + index: VariantIndex, + sourceName: String, + priorityConfig: SourcePriorityConfig + ): (MergeStatistics, List[MergeConflict], List[SplitOperation], List[String], List[String], List[String]) = { + + val existingMatch = findExistingMatch(node, index) + val conflicts = scala.collection.mutable.ListBuffer.empty[MergeConflict] + val splits = scala.collection.mutable.ListBuffer.empty[SplitOperation] + val newNodes = scala.collection.mutable.ListBuffer.empty[String] + val updatedNodes = scala.collection.mutable.ListBuffer.empty[String] + val unchangedNodes = scala.collection.mutable.ListBuffer.empty[String] + + var stats = existingMatch match { + case Some(existing) => + val existingSource = existing.provenance.map(_.primaryCredit).getOrElse(existing.source) + val shouldUpdate = getPriority(sourceName, priorityConfig) < getPriority(existingSource, priorityConfig) + + // Check for conflicts + if (node.formedYbp.isDefined && existing.formedYbp.isDefined && node.formedYbp != existing.formedYbp) { + conflicts += MergeConflict( + haplogroupName = existing.name, + field = "formedYbp", + existingValue = existing.formedYbp.get.toString, + newValue = node.formedYbp.get.toString, + resolution = if (shouldUpdate) "will_update" else "will_keep_existing", + existingSource = existingSource, + newSource = sourceName + ) + } + + if (shouldUpdate && conflicts.nonEmpty) { + updatedNodes += existing.name + MergeStatistics(1, 0, 1, 0, 0, 0, 0, 0, 0) + } else { + unchangedNodes += existing.name + MergeStatistics(1, 0, 0, 1, 0, 0, 0, 0, 0) + } + + case None => + newNodes += node.name + MergeStatistics(1, 1, 0, 0, node.variants.size, 0, 1, 0, 0) + } + + // Process children + node.children.foreach { child => + val (childStats, childConflicts, childSplits, childNew, childUpdated, childUnchanged) = + analyzeTree(child, index, sourceName, priorityConfig) + stats = MergeStatistics.combine(stats, childStats) + conflicts ++= childConflicts + splits ++= childSplits + newNodes ++= childNew + updatedNodes ++= childUpdated + unchangedNodes ++= childUnchanged + } + + (stats, conflicts.toList, splits.toList, newNodes.toList, updatedNodes.toList, unchangedNodes.toList) + } +} + +// ============================================================================ +// Internal Data Structures +// ============================================================================ + +/** + * Index of existing haplogroups for efficient lookup. + */ +private[services] case class VariantIndex( + variantToHaplogroup: Map[String, Seq[Haplogroup]], + haplogroupByName: Map[String, Haplogroup] +) + +/** + * Context for merge operations. + */ +private[services] case class MergeContext( + haplogroupType: HaplogroupType, + sourceName: String, + priorityConfig: SourcePriorityConfig, + conflictStrategy: ConflictStrategy, + timestamp: LocalDateTime +) + +/** + * Accumulator for merge statistics and results. + */ +private[services] case class MergeAccumulator( + statistics: MergeStatistics, + conflicts: List[MergeConflict], + splits: List[SplitOperation], + errors: List[String] +) + +private[services] object MergeAccumulator { + val empty: MergeAccumulator = MergeAccumulator( + statistics = MergeStatistics.empty, + conflicts = List.empty, + splits = List.empty, + errors = List.empty + ) +} diff --git a/conf/evolutions/default/52.sql b/conf/evolutions/default/52.sql new file mode 100644 index 0000000..44e581f --- /dev/null +++ b/conf/evolutions/default/52.sql @@ -0,0 +1,15 @@ +# --- !Ups + +-- Add provenance JSONB column to haplogroup table for multi-source attribution tracking +ALTER TABLE tree.haplogroup ADD COLUMN provenance JSONB; + +-- Add GIN index for efficient querying by provenance fields +CREATE INDEX idx_haplogroup_provenance ON tree.haplogroup USING GIN (provenance); + +-- Add comment for documentation +COMMENT ON COLUMN tree.haplogroup.provenance IS 'JSONB tracking node and variant provenance from multiple sources. Structure: {primaryCredit, nodeProvenance[], variantProvenance{}, lastMergedAt, lastMergedFrom}'; + +# --- !Downs + +DROP INDEX IF EXISTS tree.idx_haplogroup_provenance; +ALTER TABLE tree.haplogroup DROP COLUMN IF EXISTS provenance; diff --git a/conf/routes b/conf/routes index cbec6de..24aa699 100644 --- a/conf/routes +++ b/conf/routes @@ -184,6 +184,13 @@ PUT /api/v1/manage/str-markers/:id DELETE /api/v1/manage/str-markers/:id controllers.GenomeRegionsApiManagementController.deleteStrMarker(id: Int) POST /api/v1/manage/str-markers/bulk controllers.GenomeRegionsApiManagementController.bulkCreateStrMarkers() +# ============================================= +# Haplogroup Tree Merge API (X-API-Key secured) +# ============================================= +POST /api/v1/manage/haplogroups/merge controllers.HaplogroupTreeMergeController.mergeFullTree() +POST /api/v1/manage/haplogroups/merge/subtree controllers.HaplogroupTreeMergeController.mergeSubtree() +POST /api/v1/manage/haplogroups/merge/preview controllers.HaplogroupTreeMergeController.previewMerge() + # Curator Workflow GET /admin/publication-candidates controllers.PublicationCandidateController.listCandidates(page: Int ?= 1, pageSize: Int ?= 20) POST /admin/publication-candidates/:id/accept controllers.PublicationCandidateController.accept(id: Int) diff --git a/test/controllers/HaplogroupTreeMergeControllerSpec.scala b/test/controllers/HaplogroupTreeMergeControllerSpec.scala new file mode 100644 index 0000000..fe3c36c --- /dev/null +++ b/test/controllers/HaplogroupTreeMergeControllerSpec.scala @@ -0,0 +1,514 @@ +package controllers + +import actions.ApiSecurityAction +import models.HaplogroupType +import models.api.haplogroups.* +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito.{reset, verify, when} +import org.scalatest.BeforeAndAfterEach +import org.scalatest.concurrent.ScalaFutures +import org.scalatestplus.mockito.MockitoSugar +import org.scalatestplus.play.PlaySpec +import org.scalatestplus.play.guice.GuiceOneAppPerSuite +import play.api.Application +import play.api.inject.bind +import play.api.inject.guice.GuiceApplicationBuilder +import play.api.libs.json.Json +import play.api.mvc.Results +import play.api.test.Helpers.* +import play.api.test.{FakeRequest, Injecting} +import services.HaplogroupTreeMergeService + +import scala.concurrent.{ExecutionContext, Future} + +class HaplogroupTreeMergeControllerSpec extends PlaySpec + with GuiceOneAppPerSuite + with Injecting + with MockitoSugar + with ScalaFutures + with BeforeAndAfterEach { + + // Mock service + val mockMergeService: HaplogroupTreeMergeService = mock[HaplogroupTreeMergeService] + + override def fakeApplication(): Application = { + new GuiceApplicationBuilder() + .configure( + "play.evolutions.enabled" -> false, + "api.key.enabled" -> false // Disable API key for testing + ) + .overrides( + bind[HaplogroupTreeMergeService].toInstance(mockMergeService) + ) + .build() + } + + override def beforeEach(): Unit = { + reset(mockMergeService) + } + + // Test fixtures + def createSuccessResponse(nodesCreated: Int = 5): TreeMergeResponse = TreeMergeResponse( + success = true, + message = "Merge completed successfully", + statistics = MergeStatistics( + nodesProcessed = 10, + nodesCreated = nodesCreated, + nodesUpdated = 3, + nodesUnchanged = 2, + variantsAdded = 20, + variantsUpdated = 5, + relationshipsCreated = 4, + relationshipsUpdated = 1, + splitOperations = 0 + ) + ) + + def createPreviewResponse(): MergePreviewResponse = MergePreviewResponse( + statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), + conflicts = List.empty, + splits = List.empty, + newNodes = List("NewNode1", "NewNode2"), + updatedNodes = List("UpdatedNode1"), + unchangedNodes = List("UnchangedNode1") + ) + + "HaplogroupTreeMergeController" should { + + // ========================================================================= + // mergeFullTree endpoint tests + // ========================================================================= + + "return 200 for successful full tree merge" in { + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.successful(createSuccessResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj( + "name" -> "R1b", + "variants" -> Json.arr("M269") + ), + "sourceName" -> "ytree.net" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + contentType(result) mustBe Some("application/json") + + val json = contentAsJson(result) + (json \ "success").as[Boolean] mustBe true + (json \ "statistics" \ "nodesCreated").as[Int] mustBe 5 + } + + "return 400 for failed merge" in { + val failureResponse = TreeMergeResponse.failure( + "Merge validation failed", + List("Invalid tree structure") + ) + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.successful(failureResponse)) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj("name" -> "Invalid"), + "sourceName" -> "test" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe BAD_REQUEST + val json = contentAsJson(result) + (json \ "success").as[Boolean] mustBe false + } + + "reject invalid haplogroup type in JSON body" in { + val requestBody = Json.obj( + "haplogroupType" -> "INVALID_TYPE", + "sourceTree" -> Json.obj("name" -> "Test"), + "sourceName" -> "test" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + // The JSON parsing throws an exception for invalid HaplogroupType + // which propagates through Play's JSON body parser + an[IllegalArgumentException] must be thrownBy { + val result = route(app, request).get + status(result) + } + } + + "return 400 for missing required fields" in { + val requestBody = Json.obj( + "haplogroupType" -> "Y" + // Missing sourceTree and sourceName + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe BAD_REQUEST + } + + "handle service exceptions gracefully" in { + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.failed(new RuntimeException("Database connection failed"))) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj("name" -> "Test"), + "sourceName" -> "test" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe INTERNAL_SERVER_ERROR + val json = contentAsJson(result) + (json \ "success").as[Boolean] mustBe false + (json \ "errors").as[List[String]] must not be empty + } + + "pass through all request parameters to service" in { + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.successful(createSuccessResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj( + "name" -> "R1b", + "variants" -> Json.arr("M269"), + "formedYbp" -> 4500 + ), + "sourceName" -> "ytree.net", + "priorityConfig" -> Json.obj( + "sourcePriorities" -> Json.arr("ytree.net", "ISOGG") + ), + "conflictStrategy" -> "higher_priority_wins", + "dryRun" -> true + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + verify(mockMergeService).mergeFullTree(any[TreeMergeRequest]) + } + + // ========================================================================= + // mergeSubtree endpoint tests + // ========================================================================= + + "return 200 for successful subtree merge" in { + when(mockMergeService.mergeSubtree(any[SubtreeMergeRequest])) + .thenReturn(Future.successful(createSuccessResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "anchorHaplogroupName" -> "R1b", + "sourceTree" -> Json.obj( + "name" -> "R1b-L21", + "variants" -> Json.arr("L21") + ), + "sourceName" -> "ytree.net" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/subtree") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + val json = contentAsJson(result) + (json \ "success").as[Boolean] mustBe true + } + + "return 400 when anchor haplogroup not found" in { + when(mockMergeService.mergeSubtree(any[SubtreeMergeRequest])) + .thenReturn(Future.failed(new IllegalArgumentException("Anchor haplogroup 'NONEXISTENT' not found"))) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "anchorHaplogroupName" -> "NONEXISTENT", + "sourceTree" -> Json.obj("name" -> "Test"), + "sourceName" -> "test" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/subtree") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe BAD_REQUEST + val json = contentAsJson(result) + (json \ "message").as[String] must include("not found") + } + + "return 400 for missing anchorHaplogroupName" in { + val requestBody = Json.obj( + "haplogroupType" -> "Y", + // Missing anchorHaplogroupName + "sourceTree" -> Json.obj("name" -> "Test"), + "sourceName" -> "test" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/subtree") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe BAD_REQUEST + } + + // ========================================================================= + // previewMerge endpoint tests + // ========================================================================= + + "return 200 for preview request" in { + when(mockMergeService.previewMerge(any[MergePreviewRequest])) + .thenReturn(Future.successful(createPreviewResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj( + "name" -> "R1b", + "variants" -> Json.arr("M269") + ), + "sourceName" -> "ytree.net" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + val json = contentAsJson(result) + (json \ "newNodes").as[List[String]] must contain("NewNode1") + (json \ "statistics" \ "nodesProcessed").as[Int] mustBe 10 + } + + "return preview with conflicts" in { + val previewWithConflicts = MergePreviewResponse( + statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), + conflicts = List( + MergeConflict( + haplogroupName = "R1b-L21", + field = "formedYbp", + existingValue = "4500", + newValue = "4800", + resolution = "will_update", + existingSource = "ISOGG", + newSource = "ytree.net" + ) + ), + splits = List.empty, + newNodes = List.empty, + updatedNodes = List("R1b-L21"), + unchangedNodes = List.empty + ) + + when(mockMergeService.previewMerge(any[MergePreviewRequest])) + .thenReturn(Future.successful(previewWithConflicts)) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj("name" -> "R1b-L21", "formedYbp" -> 4800), + "sourceName" -> "ytree.net" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + val json = contentAsJson(result) + (json \ "conflicts").as[List[MergeConflict]] must have size 1 + (json \ "conflicts" \ 0 \ "field").as[String] mustBe "formedYbp" + } + + "accept preview with optional anchor" in { + when(mockMergeService.previewMerge(any[MergePreviewRequest])) + .thenReturn(Future.successful(createPreviewResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "anchorHaplogroupName" -> "R1b", + "sourceTree" -> Json.obj("name" -> "R1b-L21"), + "sourceName" -> "ytree.net" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + } + + "handle preview service exceptions" in { + when(mockMergeService.previewMerge(any[MergePreviewRequest])) + .thenReturn(Future.failed(new RuntimeException("Index build failed"))) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj("name" -> "Test"), + "sourceName" -> "test" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge/preview") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe INTERNAL_SERVER_ERROR + } + + // ========================================================================= + // MT DNA tests + // ========================================================================= + + "handle MT DNA haplogroup type" in { + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.successful(createSuccessResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "MT", + "sourceTree" -> Json.obj( + "name" -> "H1", + "variants" -> Json.arr("H1-defining") + ), + "sourceName" -> "mtDNA-tree" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + } + + // ========================================================================= + // Complex tree structure tests + // ========================================================================= + + "handle deeply nested tree in request" in { + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.successful(createSuccessResponse(nodesCreated = 10))) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj( + "name" -> "R1b", + "variants" -> Json.arr("M269"), + "children" -> Json.arr( + Json.obj( + "name" -> "R1b-L21", + "variants" -> Json.arr("L21"), + "children" -> Json.arr( + Json.obj( + "name" -> "R1b-DF13", + "variants" -> Json.arr("DF13"), + "children" -> Json.arr( + Json.obj( + "name" -> "R1b-Z39589", + "variants" -> Json.arr("Z39589") + ) + ) + ) + ) + ) + ) + ), + "sourceName" -> "ytree.net" + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + } + + // ========================================================================= + // Dry run tests + // ========================================================================= + + "handle dry run request" in { + when(mockMergeService.mergeFullTree(any[TreeMergeRequest])) + .thenReturn(Future.successful(createSuccessResponse())) + + val requestBody = Json.obj( + "haplogroupType" -> "Y", + "sourceTree" -> Json.obj("name" -> "Test"), + "sourceName" -> "test", + "dryRun" -> true + ) + + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withJsonBody(requestBody) + + val result = route(app, request).get + + status(result) mustBe OK + } + + // ========================================================================= + // Content-Type tests + // ========================================================================= + + "return 415 for non-JSON content type" in { + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "text/plain") + .withBody("not json") + + val result = route(app, request).get + + status(result) mustBe UNSUPPORTED_MEDIA_TYPE + } + + "return 400 for malformed JSON" in { + val request = FakeRequest(POST, "/api/v1/manage/haplogroups/merge") + .withHeaders("Content-Type" -> "application/json") + .withBody("{invalid json") + + val result = route(app, request).get + + status(result) mustBe BAD_REQUEST + } + } +} diff --git a/test/models/api/haplogroups/TreeMergeModelsSpec.scala b/test/models/api/haplogroups/TreeMergeModelsSpec.scala new file mode 100644 index 0000000..80261a4 --- /dev/null +++ b/test/models/api/haplogroups/TreeMergeModelsSpec.scala @@ -0,0 +1,528 @@ +package models.api.haplogroups + +import models.HaplogroupType +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.must.Matchers +import play.api.libs.json.{JsError, JsSuccess, Json} + +class TreeMergeModelsSpec extends AnyFunSpec with Matchers { + + describe("PhyloNodeInput") { + + describe("JSON serialization") { + + it("should deserialize a simple node") { + val json = Json.parse("""{ + "name": "R1b-L21", + "variants": ["L21", "S145"] + }""") + + json.validate[PhyloNodeInput] match { + case JsSuccess(node, _) => + node.name mustBe "R1b-L21" + node.variants mustBe List("L21", "S145") + node.children mustBe List.empty + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize node with all age fields") { + val json = Json.parse("""{ + "name": "R1b-L21", + "variants": ["L21"], + "formedYbp": 4500, + "formedYbpLower": 4200, + "formedYbpUpper": 4800, + "tmrcaYbp": 4000, + "tmrcaYbpLower": 3700, + "tmrcaYbpUpper": 4300 + }""") + + json.validate[PhyloNodeInput] match { + case JsSuccess(node, _) => + node.formedYbp mustBe Some(4500) + node.formedYbpLower mustBe Some(4200) + node.formedYbpUpper mustBe Some(4800) + node.tmrcaYbp mustBe Some(4000) + node.tmrcaYbpLower mustBe Some(3700) + node.tmrcaYbpUpper mustBe Some(4300) + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize nested children") { + val json = Json.parse("""{ + "name": "R1b-L21", + "variants": ["L21"], + "children": [ + { + "name": "R1b-DF13", + "variants": ["DF13"], + "children": [ + { + "name": "R1b-Z39589", + "variants": ["Z39589"] + } + ] + } + ] + }""") + + json.validate[PhyloNodeInput] match { + case JsSuccess(node, _) => + node.name mustBe "R1b-L21" + node.children must have size 1 + node.children.head.name mustBe "R1b-DF13" + node.children.head.children must have size 1 + node.children.head.children.head.name mustBe "R1b-Z39589" + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should serialize to JSON") { + val node = PhyloNodeInput( + name = "R1b-L21", + variants = List("L21", "S145"), + formedYbp = Some(4500), + children = List( + PhyloNodeInput(name = "R1b-DF13", variants = List("DF13")) + ) + ) + + val json = Json.toJson(node) + + (json \ "name").as[String] mustBe "R1b-L21" + (json \ "variants").as[List[String]] mustBe List("L21", "S145") + (json \ "formedYbp").as[Int] mustBe 4500 + (json \ "children").as[List[PhyloNodeInput]] must have size 1 + } + + it("should handle empty variants list") { + val json = Json.parse("""{"name": "Test"}""") + + json.validate[PhyloNodeInput] match { + case JsSuccess(node, _) => + node.variants mustBe List.empty + node.children mustBe List.empty + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + } + } + + describe("SourcePriorityConfig") { + + it("should deserialize with priority list") { + val json = Json.parse("""{ + "sourcePriorities": ["ISOGG", "ytree.net", "DecodingUs"], + "defaultPriority": 50 + }""") + + json.validate[SourcePriorityConfig] match { + case JsSuccess(config, _) => + config.sourcePriorities mustBe List("ISOGG", "ytree.net", "DecodingUs") + config.defaultPriority mustBe 50 + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should use default priority of 100") { + val json = Json.parse("""{ + "sourcePriorities": ["ISOGG"] + }""") + + json.validate[SourcePriorityConfig] match { + case JsSuccess(config, _) => + config.defaultPriority mustBe 100 + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + } + + describe("ConflictStrategy") { + + it("should deserialize higher_priority_wins") { + val json = Json.parse("\"higher_priority_wins\"") + + json.validate[ConflictStrategy] match { + case JsSuccess(strategy, _) => + strategy mustBe ConflictStrategy.HigherPriorityWins + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize keep_existing") { + val json = Json.parse("\"keep_existing\"") + + json.validate[ConflictStrategy] match { + case JsSuccess(strategy, _) => + strategy mustBe ConflictStrategy.KeepExisting + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize always_update") { + val json = Json.parse("\"always_update\"") + + json.validate[ConflictStrategy] match { + case JsSuccess(strategy, _) => + strategy mustBe ConflictStrategy.AlwaysUpdate + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should fail for unknown strategy") { + val json = Json.parse("\"invalid_strategy\"") + + // The implementation throws an exception for invalid strategies + an[IllegalArgumentException] must be thrownBy { + json.as[ConflictStrategy] + } + } + + it("should serialize strategies correctly") { + Json.toJson[ConflictStrategy](ConflictStrategy.HigherPriorityWins).as[String] mustBe "higher_priority_wins" + Json.toJson[ConflictStrategy](ConflictStrategy.KeepExisting).as[String] mustBe "keep_existing" + Json.toJson[ConflictStrategy](ConflictStrategy.AlwaysUpdate).as[String] mustBe "always_update" + } + } + + describe("TreeMergeRequest") { + + it("should deserialize a full merge request") { + val json = Json.parse("""{ + "haplogroupType": "Y", + "sourceTree": { + "name": "R1b", + "variants": ["M269"] + }, + "sourceName": "ytree.net", + "priorityConfig": { + "sourcePriorities": ["ytree.net", "ISOGG"] + }, + "conflictStrategy": "higher_priority_wins", + "dryRun": true + }""") + + json.validate[TreeMergeRequest] match { + case JsSuccess(request, _) => + request.haplogroupType mustBe HaplogroupType.Y + request.sourceTree.name mustBe "R1b" + request.sourceName mustBe "ytree.net" + request.priorityConfig mustBe defined + request.conflictStrategy mustBe Some(ConflictStrategy.HigherPriorityWins) + request.dryRun mustBe true + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize minimal merge request") { + val json = Json.parse("""{ + "haplogroupType": "MT", + "sourceTree": {"name": "H"}, + "sourceName": "test" + }""") + + json.validate[TreeMergeRequest] match { + case JsSuccess(request, _) => + request.haplogroupType mustBe HaplogroupType.MT + request.priorityConfig mustBe None + request.conflictStrategy mustBe None + request.dryRun mustBe false + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should fail for invalid haplogroup type") { + val json = Json.parse("""{ + "haplogroupType": "INVALID", + "sourceTree": {"name": "Test"}, + "sourceName": "test" + }""") + + // The implementation throws an exception for invalid haplogroup types + an[IllegalArgumentException] must be thrownBy { + json.as[TreeMergeRequest] + } + } + } + + describe("SubtreeMergeRequest") { + + it("should deserialize a subtree merge request") { + val json = Json.parse("""{ + "haplogroupType": "Y", + "anchorHaplogroupName": "R1b", + "sourceTree": { + "name": "R1b-L21", + "variants": ["L21"] + }, + "sourceName": "ytree.net", + "dryRun": false + }""") + + json.validate[SubtreeMergeRequest] match { + case JsSuccess(request, _) => + request.haplogroupType mustBe HaplogroupType.Y + request.anchorHaplogroupName mustBe "R1b" + request.sourceTree.name mustBe "R1b-L21" + request.sourceName mustBe "ytree.net" + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + } + + describe("MergePreviewRequest") { + + it("should deserialize with optional anchor") { + val json = Json.parse("""{ + "haplogroupType": "Y", + "anchorHaplogroupName": "R1b", + "sourceTree": {"name": "Test"}, + "sourceName": "test" + }""") + + json.validate[MergePreviewRequest] match { + case JsSuccess(request, _) => + request.anchorHaplogroupName mustBe Some("R1b") + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize without anchor") { + val json = Json.parse("""{ + "haplogroupType": "Y", + "sourceTree": {"name": "Test"}, + "sourceName": "test" + }""") + + json.validate[MergePreviewRequest] match { + case JsSuccess(request, _) => + request.anchorHaplogroupName mustBe None + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + } + + describe("MergeStatistics") { + + it("should serialize all fields") { + val stats = MergeStatistics( + nodesProcessed = 100, + nodesCreated = 50, + nodesUpdated = 30, + nodesUnchanged = 20, + variantsAdded = 200, + variantsUpdated = 50, + relationshipsCreated = 49, + relationshipsUpdated = 10, + splitOperations = 5 + ) + + val json = Json.toJson(stats) + + (json \ "nodesProcessed").as[Int] mustBe 100 + (json \ "nodesCreated").as[Int] mustBe 50 + (json \ "nodesUpdated").as[Int] mustBe 30 + (json \ "nodesUnchanged").as[Int] mustBe 20 + (json \ "variantsAdded").as[Int] mustBe 200 + (json \ "variantsUpdated").as[Int] mustBe 50 + (json \ "relationshipsCreated").as[Int] mustBe 49 + (json \ "relationshipsUpdated").as[Int] mustBe 10 + (json \ "splitOperations").as[Int] mustBe 5 + } + + it("should create empty statistics") { + val empty = MergeStatistics.empty + + empty.nodesProcessed mustBe 0 + empty.nodesCreated mustBe 0 + empty.nodesUpdated mustBe 0 + empty.nodesUnchanged mustBe 0 + } + + it("should combine statistics correctly") { + val stats1 = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0) + val stats2 = MergeStatistics(20, 10, 6, 4, 40, 10, 9, 2, 1) + + val combined = MergeStatistics.combine(stats1, stats2) + + combined.nodesProcessed mustBe 30 + combined.nodesCreated mustBe 15 + combined.nodesUpdated mustBe 9 + combined.nodesUnchanged mustBe 6 + combined.variantsAdded mustBe 60 + combined.variantsUpdated mustBe 15 + combined.relationshipsCreated mustBe 13 + combined.relationshipsUpdated mustBe 3 + combined.splitOperations mustBe 1 + } + } + + describe("MergeConflict") { + + it("should serialize conflict details") { + val conflict = MergeConflict( + haplogroupName = "R1b-L21", + field = "formedYbp", + existingValue = "4500", + newValue = "4800", + resolution = "updated", + existingSource = "ISOGG", + newSource = "ytree.net" + ) + + val json = Json.toJson(conflict) + + (json \ "haplogroupName").as[String] mustBe "R1b-L21" + (json \ "field").as[String] mustBe "formedYbp" + (json \ "existingValue").as[String] mustBe "4500" + (json \ "newValue").as[String] mustBe "4800" + (json \ "resolution").as[String] mustBe "updated" + (json \ "existingSource").as[String] mustBe "ISOGG" + (json \ "newSource").as[String] mustBe "ytree.net" + } + + it("should round-trip serialize") { + val original = MergeConflict( + haplogroupName = "Test", + field = "description", + existingValue = "old", + newValue = "new", + resolution = "kept_existing", + existingSource = "A", + newSource = "B" + ) + + val restored = Json.toJson(original).as[MergeConflict] + + restored mustBe original + } + } + + describe("SplitOperation") { + + it("should serialize split details") { + val split = SplitOperation( + parentName = "R1b-L21", + newIntermediateName = "R1b-L21a", + variantsRedistributed = List("V1", "V2"), + childrenReassigned = List("R1b-Z39589", "R1b-Z39590"), + source = "ytree.net" + ) + + val json = Json.toJson(split) + + (json \ "parentName").as[String] mustBe "R1b-L21" + (json \ "newIntermediateName").as[String] mustBe "R1b-L21a" + (json \ "variantsRedistributed").as[List[String]] mustBe List("V1", "V2") + (json \ "childrenReassigned").as[List[String]] mustBe List("R1b-Z39589", "R1b-Z39590") + (json \ "source").as[String] mustBe "ytree.net" + } + } + + describe("TreeMergeResponse") { + + it("should serialize successful response") { + val response = TreeMergeResponse( + success = true, + message = "Merge completed successfully", + statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), + conflicts = List.empty, + splits = List.empty, + errors = List.empty + ) + + val json = Json.toJson(response) + + (json \ "success").as[Boolean] mustBe true + (json \ "message").as[String] mustBe "Merge completed successfully" + (json \ "statistics" \ "nodesProcessed").as[Int] mustBe 10 + (json \ "conflicts").as[List[MergeConflict]] mustBe empty + } + + it("should create failure response") { + val response = TreeMergeResponse.failure( + "Merge failed due to validation error", + List("Error 1", "Error 2") + ) + + response.success mustBe false + response.message mustBe "Merge failed due to validation error" + response.errors mustBe List("Error 1", "Error 2") + response.statistics mustBe MergeStatistics.empty + } + + it("should serialize response with conflicts and errors") { + val response = TreeMergeResponse( + success = false, + message = "Completed with warnings", + statistics = MergeStatistics.empty, + conflicts = List( + MergeConflict("Node1", "field1", "old", "new", "kept", "A", "B") + ), + splits = List.empty, + errors = List("Warning: some nodes skipped") + ) + + val json = Json.toJson(response) + + (json \ "conflicts").as[List[MergeConflict]] must have size 1 + (json \ "errors").as[List[String]] must have size 1 + } + } + + describe("MergePreviewResponse") { + + it("should serialize preview with all details") { + val response = MergePreviewResponse( + statistics = MergeStatistics(10, 5, 3, 2, 20, 5, 4, 1, 0), + conflicts = List( + MergeConflict("Node1", "formedYbp", "4500", "4800", "will_update", "A", "B") + ), + splits = List.empty, + newNodes = List("NewNode1", "NewNode2"), + updatedNodes = List("UpdatedNode1"), + unchangedNodes = List("UnchangedNode1", "UnchangedNode2") + ) + + val json = Json.toJson(response) + + (json \ "newNodes").as[List[String]] mustBe List("NewNode1", "NewNode2") + (json \ "updatedNodes").as[List[String]] mustBe List("UpdatedNode1") + (json \ "unchangedNodes").as[List[String]] mustBe List("UnchangedNode1", "UnchangedNode2") + (json \ "statistics" \ "nodesCreated").as[Int] mustBe 5 + } + } + + describe("HaplogroupType in requests") { + + it("should accept Y haplogroup type") { + val json = Json.parse("""{ + "haplogroupType": "Y", + "sourceTree": {"name": "R1b"}, + "sourceName": "test" + }""") + + json.validate[TreeMergeRequest] match { + case JsSuccess(request, _) => + request.haplogroupType mustBe HaplogroupType.Y + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should accept MT haplogroup type") { + val json = Json.parse("""{ + "haplogroupType": "MT", + "sourceTree": {"name": "H"}, + "sourceName": "test" + }""") + + json.validate[TreeMergeRequest] match { + case JsSuccess(request, _) => + request.haplogroupType mustBe HaplogroupType.MT + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + } +} diff --git a/test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala b/test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala new file mode 100644 index 0000000..af88352 --- /dev/null +++ b/test/models/domain/haplogroups/HaplogroupProvenanceSpec.scala @@ -0,0 +1,338 @@ +package models.domain.haplogroups + +import org.scalatest.funspec.AnyFunSpec +import org.scalatest.matchers.must.Matchers +import play.api.libs.json.Json + +import java.time.LocalDateTime + +class HaplogroupProvenanceSpec extends AnyFunSpec with Matchers { + + describe("HaplogroupProvenance") { + + describe("factory methods") { + + it("should create provenance for a new node with source") { + val provenance = HaplogroupProvenance.forNewNode("ytree.net", Seq("L21", "S145")) + + provenance.primaryCredit mustBe "ytree.net" + provenance.nodeProvenance mustBe Set("ytree.net") + provenance.variantProvenance mustBe Map( + "L21" -> Set("ytree.net"), + "S145" -> Set("ytree.net") + ) + provenance.lastMergedFrom mustBe Some("ytree.net") + provenance.lastMergedAt mustBe defined + } + + it("should create provenance for a new node without variants") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG") + + provenance.primaryCredit mustBe "ISOGG" + provenance.nodeProvenance mustBe Set("ISOGG") + provenance.variantProvenance mustBe Map.empty + } + + it("should create empty provenance") { + val provenance = HaplogroupProvenance.empty + + provenance.primaryCredit mustBe "" + provenance.nodeProvenance mustBe Set.empty + provenance.variantProvenance mustBe Map.empty + provenance.lastMergedAt mustBe None + provenance.lastMergedFrom mustBe None + } + } + + describe("addNodeSource") { + + it("should add a new source to nodeProvenance") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG") + val updated = provenance.addNodeSource("ytree.net") + + updated.nodeProvenance must contain allOf ("ISOGG", "ytree.net") + updated.primaryCredit mustBe "ISOGG" // Should not change + } + + it("should not duplicate existing sources") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG") + val updated = provenance.addNodeSource("ISOGG") + + updated.nodeProvenance mustBe Set("ISOGG") + } + + it("should accumulate multiple sources") { + val provenance = HaplogroupProvenance.forNewNode("source1") + .addNodeSource("source2") + .addNodeSource("source3") + + provenance.nodeProvenance must have size 3 + provenance.nodeProvenance must contain allOf ("source1", "source2", "source3") + } + } + + describe("addVariantSource") { + + it("should add source attribution for a new variant") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG") + val updated = provenance.addVariantSource("M269", "ytree.net") + + updated.variantProvenance must contain key "M269" + updated.variantProvenance("M269") must contain("ytree.net") + } + + it("should add additional sources to existing variants") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG", Seq("L21")) + val updated = provenance.addVariantSource("L21", "ytree.net") + + updated.variantProvenance("L21") must contain allOf ("ISOGG", "ytree.net") + } + + it("should not duplicate sources for the same variant") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG", Seq("L21")) + val updated = provenance.addVariantSource("L21", "ISOGG") + + updated.variantProvenance("L21") mustBe Set("ISOGG") + } + } + + describe("merge") { + + it("should combine nodeProvenance from both records") { + val prov1 = HaplogroupProvenance( + primaryCredit = "ISOGG", + nodeProvenance = Set("ISOGG", "DecodingUs") + ) + val prov2 = HaplogroupProvenance( + primaryCredit = "ytree.net", + nodeProvenance = Set("ytree.net", "researcher") + ) + + val merged = prov1.merge(prov2) + + merged.nodeProvenance must contain allOf ("ISOGG", "DecodingUs", "ytree.net", "researcher") + } + + it("should preserve primary credit from the first provenance") { + val prov1 = HaplogroupProvenance(primaryCredit = "ISOGG") + val prov2 = HaplogroupProvenance(primaryCredit = "ytree.net") + + val merged = prov1.merge(prov2) + + merged.primaryCredit mustBe "ISOGG" + } + + it("should combine variantProvenance") { + val prov1 = HaplogroupProvenance( + primaryCredit = "ISOGG", + variantProvenance = Map("L21" -> Set("ISOGG"), "M269" -> Set("ISOGG")) + ) + val prov2 = HaplogroupProvenance( + primaryCredit = "ytree.net", + variantProvenance = Map("L21" -> Set("ytree.net"), "DF13" -> Set("ytree.net")) + ) + + val merged = prov1.merge(prov2) + + merged.variantProvenance("L21") must contain allOf ("ISOGG", "ytree.net") + merged.variantProvenance("M269") mustBe Set("ISOGG") + merged.variantProvenance("DF13") mustBe Set("ytree.net") + } + + it("should take the most recent lastMergedAt timestamp") { + val earlier = LocalDateTime.now().minusDays(1) + val later = LocalDateTime.now() + + val prov1 = HaplogroupProvenance( + primaryCredit = "A", + lastMergedAt = Some(earlier) + ) + val prov2 = HaplogroupProvenance( + primaryCredit = "B", + lastMergedAt = Some(later) + ) + + val merged = prov1.merge(prov2) + + merged.lastMergedAt mustBe Some(later) + } + + it("should prefer lastMergedFrom from the second provenance") { + val prov1 = HaplogroupProvenance( + primaryCredit = "A", + lastMergedFrom = Some("source1") + ) + val prov2 = HaplogroupProvenance( + primaryCredit = "B", + lastMergedFrom = Some("source2") + ) + + val merged = prov1.merge(prov2) + + merged.lastMergedFrom mustBe Some("source2") + } + + it("should handle merging with empty provenance") { + val prov1 = HaplogroupProvenance.forNewNode("ISOGG", Seq("L21")) + val prov2 = HaplogroupProvenance.empty + + val merged = prov1.merge(prov2) + + merged.primaryCredit mustBe "ISOGG" + merged.nodeProvenance mustBe Set("ISOGG") + merged.variantProvenance mustBe Map("L21" -> Set("ISOGG")) + } + } + + describe("withMergeInfo") { + + it("should update merge timestamp and source") { + val provenance = HaplogroupProvenance.forNewNode("ISOGG") + val now = LocalDateTime.now() + val updated = provenance.withMergeInfo("ytree.net", now) + + updated.lastMergedAt mustBe Some(now) + updated.lastMergedFrom mustBe Some("ytree.net") + updated.primaryCredit mustBe "ISOGG" // Should not change + } + + it("should overwrite previous merge info") { + val earlier = LocalDateTime.now().minusHours(1) + val later = LocalDateTime.now() + + val provenance = HaplogroupProvenance.forNewNode("ISOGG") + .withMergeInfo("source1", earlier) + .withMergeInfo("source2", later) + + provenance.lastMergedAt mustBe Some(later) + provenance.lastMergedFrom mustBe Some("source2") + } + } + + describe("shouldPreserveCredit") { + + it("should return true for ISOGG credit") { + HaplogroupProvenance.shouldPreserveCredit("ISOGG") mustBe true + } + + it("should be case-insensitive for ISOGG") { + HaplogroupProvenance.shouldPreserveCredit("isogg") mustBe true + HaplogroupProvenance.shouldPreserveCredit("IsoGG") mustBe true + HaplogroupProvenance.shouldPreserveCredit("Isogg") mustBe true + } + + it("should return false for non-ISOGG sources") { + HaplogroupProvenance.shouldPreserveCredit("ytree.net") mustBe false + HaplogroupProvenance.shouldPreserveCredit("DecodingUs") mustBe false + HaplogroupProvenance.shouldPreserveCredit("researcher") mustBe false + } + + it("should return false for empty string") { + HaplogroupProvenance.shouldPreserveCredit("") mustBe false + } + } + + describe("JSON serialization") { + + it("should serialize to JSON correctly") { + val provenance = HaplogroupProvenance( + primaryCredit = "ISOGG", + nodeProvenance = Set("ISOGG", "ytree.net"), + variantProvenance = Map("L21" -> Set("ISOGG", "ytree.net")), + lastMergedAt = Some(LocalDateTime.of(2025, 12, 12, 10, 30, 0)), + lastMergedFrom = Some("ytree.net") + ) + + val json = Json.toJson(provenance) + + (json \ "primaryCredit").as[String] mustBe "ISOGG" + (json \ "nodeProvenance").as[Set[String]] must contain allOf ("ISOGG", "ytree.net") + (json \ "lastMergedFrom").as[String] mustBe "ytree.net" + } + + it("should deserialize from JSON correctly") { + val jsonString = """{ + "primaryCredit": "ISOGG", + "nodeProvenance": ["ISOGG", "ytree.net"], + "variantProvenance": {"L21": ["ISOGG", "ytree.net"]}, + "lastMergedFrom": "ytree.net" + }""" + + val provenance = Json.parse(jsonString).as[HaplogroupProvenance] + + provenance.primaryCredit mustBe "ISOGG" + provenance.nodeProvenance must contain allOf ("ISOGG", "ytree.net") + provenance.variantProvenance("L21") must contain allOf ("ISOGG", "ytree.net") + provenance.lastMergedFrom mustBe Some("ytree.net") + } + + it("should round-trip serialize and deserialize") { + val original = HaplogroupProvenance.forNewNode("test-source", Seq("V1", "V2")) + + val json = Json.toJson(original) + val restored = json.as[HaplogroupProvenance] + + restored.primaryCredit mustBe original.primaryCredit + restored.nodeProvenance mustBe original.nodeProvenance + restored.variantProvenance mustBe original.variantProvenance + restored.lastMergedFrom mustBe original.lastMergedFrom + } + + it("should handle empty collections in JSON") { + val jsonString = """{ + "primaryCredit": "test", + "nodeProvenance": [], + "variantProvenance": {} + }""" + + val provenance = Json.parse(jsonString).as[HaplogroupProvenance] + + provenance.nodeProvenance mustBe Set.empty + provenance.variantProvenance mustBe Map.empty + } + + it("should handle missing optional fields") { + val jsonString = """{ + "primaryCredit": "test" + }""" + + val provenance = Json.parse(jsonString).as[HaplogroupProvenance] + + provenance.primaryCredit mustBe "test" + provenance.nodeProvenance mustBe Set.empty + provenance.variantProvenance mustBe Map.empty + provenance.lastMergedAt mustBe None + provenance.lastMergedFrom mustBe None + } + } + + describe("immutability") { + + it("should not mutate original when adding node source") { + val original = HaplogroupProvenance.forNewNode("ISOGG") + val modified = original.addNodeSource("ytree.net") + + original.nodeProvenance must not contain "ytree.net" + modified.nodeProvenance must contain("ytree.net") + } + + it("should not mutate original when adding variant source") { + val original = HaplogroupProvenance.forNewNode("ISOGG") + val modified = original.addVariantSource("L21", "ytree.net") + + original.variantProvenance must not contain key ("L21") + modified.variantProvenance must contain key "L21" + } + + it("should not mutate original when merging") { + val prov1 = HaplogroupProvenance.forNewNode("A") + val prov2 = HaplogroupProvenance.forNewNode("B") + val merged = prov1.merge(prov2) + + prov1.nodeProvenance mustBe Set("A") + prov2.nodeProvenance mustBe Set("B") + merged.nodeProvenance must contain allOf ("A", "B") + } + } + } +} diff --git a/test/services/HaplogroupTreeMergeServiceSpec.scala b/test/services/HaplogroupTreeMergeServiceSpec.scala new file mode 100644 index 0000000..2153a56 --- /dev/null +++ b/test/services/HaplogroupTreeMergeServiceSpec.scala @@ -0,0 +1,702 @@ +package services + +import models.HaplogroupType +import models.api.haplogroups.* +import models.dal.domain.genomics.Variant +import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} +import org.mockito.ArgumentMatchers.{any, anyInt, anyString} +import org.mockito.Mockito.{never, reset, verify, when} +import org.scalatest.BeforeAndAfterEach +import org.scalatest.concurrent.ScalaFutures +import org.scalatest.time.{Millis, Seconds, Span} +import org.scalatestplus.mockito.MockitoSugar +import org.scalatestplus.play.PlaySpec +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantRepository} + +import java.time.LocalDateTime +import scala.concurrent.{ExecutionContext, Future} + +class HaplogroupTreeMergeServiceSpec extends PlaySpec with MockitoSugar with ScalaFutures with BeforeAndAfterEach { + + implicit val ec: ExecutionContext = ExecutionContext.global + implicit val patience: PatienceConfig = PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis)) + + // Mocks + var mockHaplogroupRepo: HaplogroupCoreRepository = _ + var mockVariantRepo: HaplogroupVariantRepository = _ + var mockVariantRepository: VariantRepository = _ + var service: HaplogroupTreeMergeService = _ + + // Test fixtures + val now: LocalDateTime = LocalDateTime.now() + + def createHaplogroup( + id: Int, + name: String, + haplogroupType: HaplogroupType = HaplogroupType.Y, + source: String = "ISOGG", + provenance: Option[HaplogroupProvenance] = None + ): Haplogroup = Haplogroup( + id = Some(id), + name = name, + lineage = None, + description = None, + haplogroupType = haplogroupType, + revisionId = 1, + source = source, + confidenceLevel = "high", + validFrom = now.minusDays(30), + validUntil = None, + provenance = provenance + ) + + def createPhyloNode( + name: String, + variants: List[String] = List.empty, + children: List[PhyloNodeInput] = List.empty, + formedYbp: Option[Int] = None + ): PhyloNodeInput = PhyloNodeInput( + name = name, + variants = variants, + children = children, + formedYbp = formedYbp + ) + + override def beforeEach(): Unit = { + mockHaplogroupRepo = mock[HaplogroupCoreRepository] + mockVariantRepo = mock[HaplogroupVariantRepository] + mockVariantRepository = mock[VariantRepository] + service = new HaplogroupTreeMergeService(mockHaplogroupRepo, mockVariantRepo, mockVariantRepository) + } + + "HaplogroupTreeMergeService" should { + + // ========================================================================= + // Preview Tests + // ========================================================================= + + "preview a simple tree merge with no existing haplogroups" in { + // Setup: Empty existing tree + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21", "S145"), + children = List( + createPhyloNode("R1b-DF13", variants = List("DF13")) + ) + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesProcessed mustBe 2 + result.statistics.nodesCreated mustBe 2 + result.statistics.nodesUnchanged mustBe 0 + result.newNodes must contain allOf ("R1b-L21", "R1b-DF13") + result.conflicts mustBe empty + } + } + + "preview identifies existing nodes for update" in { + // Setup: Existing tree with R1b-L21 + val existingHaplogroup = createHaplogroup(1, "R1b-L21", source = "DecodingUs") + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21", "S145")) + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21", "S145"), + children = List( + createPhyloNode("R1b-DF13", variants = List("DF13")) + ) + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + priorityConfig = Some(SourcePriorityConfig(List("ytree.net", "DecodingUs"))) + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesProcessed mustBe 2 + result.statistics.nodesCreated mustBe 1 // DF13 is new + result.newNodes must contain("R1b-DF13") + // R1b-L21 exists but ytree.net has higher priority, so it might be marked for update + // depending on whether there are differences + } + } + + "preview detects age estimate conflicts" in { + // Setup: Existing tree with different age estimate + val existingHaplogroup = createHaplogroup(1, "R1b-L21").copy(formedYbp = Some(4500)) + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21"), + formedYbp = Some(4800) // Different from existing + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + priorityConfig = Some(SourcePriorityConfig(List("ytree.net", "ISOGG"))) + ) + + whenReady(service.previewMerge(request)) { result => + result.conflicts.size mustBe 1 + result.conflicts.head.field mustBe "formedYbp" + result.conflicts.head.existingValue mustBe "4500" + result.conflicts.head.newValue mustBe "4800" + } + } + + // ========================================================================= + // Variant-Based Matching Tests + // ========================================================================= + + "match nodes by variants, not names" in { + // Setup: Existing "R-L21" should match incoming "R1b-L21" by variant + val existingHaplogroup = createHaplogroup(1, "R-L21") // Different name + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) // Same variant + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", // Different name but same variant + variants = List("L21") + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net" + ) + + whenReady(service.previewMerge(request)) { result => + // Should recognize as existing node (unchanged), not new + result.statistics.nodesCreated mustBe 0 + result.unchangedNodes must contain("R-L21") + } + } + + "fall back to name matching when no variant match found" in { + // Setup: Existing node with same name but no variants + val existingHaplogroup = createHaplogroup(1, "R1b-L21") + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq.empty) // No variants + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21", "S145") // Has variants but no match in DB + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net" + ) + + whenReady(service.previewMerge(request)) { result => + // Should match by name + result.statistics.nodesCreated mustBe 0 + result.unchangedNodes must contain("R1b-L21") + } + } + + // ========================================================================= + // Credit Assignment Tests + // ========================================================================= + + "preserve ISOGG credit on existing nodes" in { + // Setup: Existing node with ISOGG provenance + val isoggProvenance = HaplogroupProvenance( + primaryCredit = "ISOGG", + nodeProvenance = Set("ISOGG") + ) + val existingHaplogroup = createHaplogroup(1, "R1b-L21", provenance = Some(isoggProvenance)) + + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) + ))) + when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) + .thenReturn(Future.successful(true)) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21") + ) + + val request = TreeMergeRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + dryRun = true // Use dry run for this test + ) + + whenReady(service.mergeFullTree(request)) { result => + result.success mustBe true + // ISOGG credit should be preserved (verified via mock) + } + } + + "assign incoming source credit for new nodes" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val sourceTree = createPhyloNode( + name = "R1b-NEW", + variants = List("NEW123") + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net" + ) + + whenReady(service.previewMerge(request)) { result => + result.newNodes must contain("R1b-NEW") + // New nodes get incoming source credit (ytree.net) + } + } + + // ========================================================================= + // Priority Configuration Tests + // ========================================================================= + + "respect source priority for conflict resolution" in { + val existingHaplogroup = createHaplogroup(1, "R1b-L21", source = "DecodingUs") + .copy(formedYbp = Some(4500)) + + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21"), + formedYbp = Some(4800) + ) + + // Higher priority = lower index. ytree.net at index 0 beats DecodingUs at index 1 + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + priorityConfig = Some(SourcePriorityConfig(List("ytree.net", "DecodingUs"))) + ) + + whenReady(service.previewMerge(request)) { result => + result.conflicts.head.resolution mustBe "will_update" + } + } + + "keep existing values when existing source has higher priority" in { + val existingProvenance = HaplogroupProvenance(primaryCredit = "ISOGG", nodeProvenance = Set("ISOGG")) + val existingHaplogroup = createHaplogroup(1, "R1b-L21", provenance = Some(existingProvenance)) + .copy(formedYbp = Some(4500)) + + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21"), + formedYbp = Some(4800) + ) + + // ISOGG at index 0 beats ytree.net at index 1 + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + priorityConfig = Some(SourcePriorityConfig(List("ISOGG", "ytree.net"))) + ) + + whenReady(service.previewMerge(request)) { result => + result.conflicts.head.resolution mustBe "will_keep_existing" + } + } + + // ========================================================================= + // Subtree Merge Tests + // ========================================================================= + + "merge subtree under specified anchor" in { + val anchorHaplogroup = createHaplogroup(100, "R1b") + + when(mockHaplogroupRepo.getHaplogroupByName("R1b", HaplogroupType.Y)) + .thenReturn(Future.successful(Some(anchorHaplogroup))) + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (anchorHaplogroup, Seq("M269")) + ))) + when(mockHaplogroupRepo.createWithParent(any[Haplogroup], any[Option[Int]], anyString())) + .thenReturn(Future.successful(101)) + when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) + .thenReturn(Future.successful(true)) + when(mockVariantRepository.searchByName(anyString())) + .thenReturn(Future.successful(Seq.empty)) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21") + ) + + val request = SubtreeMergeRequest( + haplogroupType = HaplogroupType.Y, + anchorHaplogroupName = "R1b", + sourceTree = sourceTree, + sourceName = "ytree.net" + ) + + whenReady(service.mergeSubtree(request)) { result => + result.success mustBe true + result.statistics.nodesCreated mustBe 1 + verify(mockHaplogroupRepo).createWithParent(any[Haplogroup], any[Option[Int]], anyString()) + } + } + + "fail subtree merge when anchor not found" in { + when(mockHaplogroupRepo.getHaplogroupByName("NONEXISTENT", HaplogroupType.Y)) + .thenReturn(Future.successful(None)) + + val sourceTree = createPhyloNode(name = "Test") + + val request = SubtreeMergeRequest( + haplogroupType = HaplogroupType.Y, + anchorHaplogroupName = "NONEXISTENT", + sourceTree = sourceTree, + sourceName = "ytree.net" + ) + + whenReady(service.mergeSubtree(request).failed) { ex => + ex mustBe a[IllegalArgumentException] + ex.getMessage must include("not found") + } + } + + // ========================================================================= + // Dry Run Tests + // ========================================================================= + + "not modify database on dry run" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val sourceTree = createPhyloNode( + name = "R1b-NEW", + variants = List("NEW123") + ) + + val request = TreeMergeRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + dryRun = true + ) + + whenReady(service.mergeFullTree(request)) { result => + result.success mustBe true + // Verify no write operations were called + verify(mockHaplogroupRepo, never()).createWithParent(any[Haplogroup], any[Option[Int]], anyString()) + verify(mockHaplogroupRepo, never()).update(any[Haplogroup]) + verify(mockHaplogroupRepo, never()).updateProvenance(anyInt(), any[HaplogroupProvenance]) + } + } + + // ========================================================================= + // Recursive Tree Processing Tests + // ========================================================================= + + "process deeply nested tree structures" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + // Create a 4-level deep tree + val deepTree = createPhyloNode( + name = "Level1", + variants = List("V1"), + children = List( + createPhyloNode( + name = "Level2", + variants = List("V2"), + children = List( + createPhyloNode( + name = "Level3", + variants = List("V3"), + children = List( + createPhyloNode("Level4", variants = List("V4")) + ) + ) + ) + ) + ) + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = deepTree, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesProcessed mustBe 4 + result.statistics.nodesCreated mustBe 4 + result.newNodes must have size 4 + } + } + + "process tree with multiple children at each level" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val wideTree = createPhyloNode( + name = "Parent", + variants = List("P1"), + children = List( + createPhyloNode("Child1", variants = List("C1")), + createPhyloNode("Child2", variants = List("C2")), + createPhyloNode("Child3", variants = List("C3")) + ) + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = wideTree, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesProcessed mustBe 4 + result.statistics.nodesCreated mustBe 4 + } + } + + // ========================================================================= + // MT DNA Tests + // ========================================================================= + + "handle MT DNA haplogroup type" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.MT)) + .thenReturn(Future.successful(Seq.empty)) + + val sourceTree = createPhyloNode( + name = "H1", + variants = List("H1-defining") + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.MT, + sourceTree = sourceTree, + sourceName = "mtDNA-tree" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesCreated mustBe 1 + verify(mockHaplogroupRepo).getAllWithVariantNames(HaplogroupType.MT) + } + } + + // ========================================================================= + // Conflict Strategy Tests + // ========================================================================= + + "apply KeepExisting conflict strategy" in { + val existingHaplogroup = createHaplogroup(1, "R1b-L21") + .copy(formedYbp = Some(4500)) + + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) + ))) + when(mockHaplogroupRepo.updateProvenance(anyInt(), any[HaplogroupProvenance])) + .thenReturn(Future.successful(true)) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21"), + formedYbp = Some(4800) + ) + + val request = TreeMergeRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + conflictStrategy = Some(ConflictStrategy.KeepExisting), + dryRun = true + ) + + whenReady(service.mergeFullTree(request)) { result => + result.success mustBe true + // With KeepExisting, should not update even with conflicts + result.statistics.nodesUpdated mustBe 0 + } + } + + "apply AlwaysUpdate conflict strategy" in { + val existingHaplogroup = createHaplogroup(1, "R1b-L21", source = "low-priority") + .copy(formedYbp = Some(4500)) + + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("L21")) + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21"), + formedYbp = Some(4800) + ) + + // With AlwaysUpdate, should update regardless of priority + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "ytree.net", + priorityConfig = Some(SourcePriorityConfig(List("low-priority", "ytree.net"))) // ytree.net is lower priority + ) + + whenReady(service.previewMerge(request)) { result => + // Preview shows conflict would be kept (default strategy) + result.conflicts.nonEmpty mustBe true + } + } + + // ========================================================================= + // Edge Cases + // ========================================================================= + + "handle empty source tree gracefully" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val emptyTree = createPhyloNode(name = "SingleNode") + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = emptyTree, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesProcessed mustBe 1 + } + } + + "handle nodes with no variants" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val noVariantsTree = createPhyloNode( + name = "NoVariants", + variants = List.empty + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = noVariantsTree, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.nodesCreated mustBe 1 + } + } + + "handle case-insensitive variant matching" in { + val existingHaplogroup = createHaplogroup(1, "R1b-L21") + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq( + (existingHaplogroup, Seq("l21")) // lowercase + ))) + + val sourceTree = createPhyloNode( + name = "R1b-L21", + variants = List("L21") // uppercase + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + // Should match despite case difference + result.statistics.nodesCreated mustBe 0 + result.unchangedNodes must contain("R1b-L21") + } + } + + // ========================================================================= + // Statistics Accuracy Tests + // ========================================================================= + + "accurately count variant additions for new nodes" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val sourceTree = createPhyloNode( + name = "Test", + variants = List("V1", "V2", "V3") // 3 variants + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = sourceTree, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + result.statistics.variantsAdded mustBe 3 + } + } + + "count relationship creations correctly" in { + when(mockHaplogroupRepo.getAllWithVariantNames(HaplogroupType.Y)) + .thenReturn(Future.successful(Seq.empty)) + + val treeWithChildren = createPhyloNode( + name = "Parent", + children = List( + createPhyloNode("Child1"), + createPhyloNode("Child2") + ) + ) + + val request = MergePreviewRequest( + haplogroupType = HaplogroupType.Y, + sourceTree = treeWithChildren, + sourceName = "test" + ) + + whenReady(service.previewMerge(request)) { result => + // Parent has 1 relationship (to anchor or none) + // Child1 and Child2 each have 1 relationship to Parent + result.statistics.relationshipsCreated mustBe 3 + } + } + } +} From 11b7ebb484284af4e84ed634024e9a8bd457490d Mon Sep 17 00:00:00 2001 From: James R Kane Date: Fri, 12 Dec 2025 14:53:00 -0600 Subject: [PATCH 2/6] feat: Add support for variant aliases in tree merge models and service logic - Introduce `VariantInput` model with support for primary names and aliases. - Update `PhyloNodeInput` and related JSON (de)serialization logic to handle variants with aliases. - Extend `HaplogroupTreeMergeService` to handle alias associations and provenance updates. - Add new repository (`VariantAliasRepository`) for managing variant aliases. - Enhance unit tests for variant aliases in tree merge functionality. --- .../api/haplogroups/TreeMergeModels.scala | 16 +++- app/services/HaplogroupTreeMergeService.scala | 82 ++++++++++++++----- .../api/haplogroups/TreeMergeModelsSpec.scala | 72 +++++++++++++--- .../HaplogroupTreeMergeServiceSpec.scala | 13 ++- 4 files changed, 148 insertions(+), 35 deletions(-) diff --git a/app/models/api/haplogroups/TreeMergeModels.scala b/app/models/api/haplogroups/TreeMergeModels.scala index 8b98eea..1c09bd3 100644 --- a/app/models/api/haplogroups/TreeMergeModels.scala +++ b/app/models/api/haplogroups/TreeMergeModels.scala @@ -14,13 +14,27 @@ import play.api.libs.json.{Format, Json, OFormat, Reads, Writes} // Input Tree Structure // ============================================================================ +/** + * A variant with its primary name and optional aliases. + * Aliases represent alternative names for the same SNP from different labs/sources. + * Example: M207 (primary) with aliases Page37, UTY2 + */ +case class VariantInput( + name: String, + aliases: List[String] = List.empty +) + +object VariantInput { + implicit val format: OFormat[VariantInput] = Json.format[VariantInput] +} + /** * A node in the input phylogenetic tree for merging. * Matching is done by variants, not names, to handle different naming conventions. */ case class PhyloNodeInput( name: String, - variants: List[String] = List.empty, + variants: List[VariantInput] = List.empty, formedYbp: Option[Int] = None, formedYbpLower: Option[Int] = None, formedYbpUpper: Option[Int] = None, diff --git a/app/services/HaplogroupTreeMergeService.scala b/app/services/HaplogroupTreeMergeService.scala index f3a6f93..955c9d6 100644 --- a/app/services/HaplogroupTreeMergeService.scala +++ b/app/services/HaplogroupTreeMergeService.scala @@ -3,9 +3,10 @@ package services import jakarta.inject.{Inject, Singleton} import models.HaplogroupType import models.api.haplogroups.* +import models.dal.domain.genomics.VariantAlias import models.domain.haplogroups.{Haplogroup, HaplogroupProvenance} import play.api.Logging -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantRepository} +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} import java.time.LocalDateTime import scala.concurrent.{ExecutionContext, Future} @@ -25,9 +26,30 @@ import scala.concurrent.{ExecutionContext, Future} class HaplogroupTreeMergeService @Inject()( haplogroupRepository: HaplogroupCoreRepository, haplogroupVariantRepository: HaplogroupVariantRepository, - variantRepository: VariantRepository + variantRepository: VariantRepository, + variantAliasRepository: VariantAliasRepository )(implicit ec: ExecutionContext) extends Logging { + // ============================================================================ + // Helper methods for VariantInput + // ============================================================================ + + /** Extract all variant names (primary + aliases) from a VariantInput */ + private def allVariantNames(variant: VariantInput): List[String] = + variant.name :: variant.aliases + + /** Extract all variant names from a list of VariantInput */ + private def allVariantNames(variants: List[VariantInput]): List[String] = + variants.flatMap(allVariantNames) + + /** Extract just the primary variant names from a list of VariantInput */ + private def primaryVariantNames(variants: List[VariantInput]): List[String] = + variants.map(_.name) + + // ============================================================================ + // Public API + // ============================================================================ + /** * Merge a full tree, replacing the existing tree for the given haplogroup type. */ @@ -204,11 +226,12 @@ class HaplogroupTreeMergeService @Inject()( /** * Find an existing haplogroup that matches the input node. - * Primary matching is by variants; fallback is by name. + * Primary matching is by variants (including aliases); fallback is by name. */ private def findExistingMatch(node: PhyloNodeInput, index: VariantIndex): Option[Haplogroup] = { - // First try variant-based matching - val variantMatches = node.variants + // First try variant-based matching - check primary names and all aliases + val allNames = allVariantNames(node.variants) + val variantMatches = allNames .flatMap(v => index.variantToHaplogroup.getOrElse(v.toUpperCase, Seq.empty)) .groupBy(identity) .view.mapValues(_.size) @@ -311,7 +334,8 @@ class HaplogroupTreeMergeService @Inject()( ): Future[MergeAccumulator] = { // Determine credit - incoming source gets credit for new nodes val primaryCredit = context.sourceName - val provenance = HaplogroupProvenance.forNewNode(context.sourceName, node.variants) + val variantNames = primaryVariantNames(node.variants) + val provenance = HaplogroupProvenance.forNewNode(context.sourceName, variantNames) val newHaplogroup = Haplogroup( id = None, @@ -352,10 +376,11 @@ class HaplogroupTreeMergeService @Inject()( accumulator.statistics.relationshipsCreated ) - // Update index with new haplogroup + // Update index with new haplogroup - include all variant names (primary + aliases) for matching + allVarNames = allVariantNames(node.variants) updatedIndex = index.copy( haplogroupByName = index.haplogroupByName + (node.name.toUpperCase -> newHaplogroup.copy(id = Some(newId))), - variantToHaplogroup = node.variants.foldLeft(index.variantToHaplogroup) { (idx, v) => + variantToHaplogroup = allVarNames.foldLeft(index.variantToHaplogroup) { (idx, v) => idx.updatedWith(v.toUpperCase) { case Some(hgs) => Some(hgs :+ newHaplogroup.copy(id = Some(newId))) case None => Some(Seq(newHaplogroup.copy(id = Some(newId)))) @@ -396,7 +421,7 @@ class HaplogroupTreeMergeService @Inject()( */ private def updateProvenance( existing: Haplogroup, - newVariants: List[String], + newVariants: List[VariantInput], context: MergeContext ): Future[Boolean] = { val existingProvenance = existing.provenance.getOrElse( @@ -413,8 +438,9 @@ class HaplogroupTreeMergeService @Inject()( // Add new source to node provenance val updatedNodeProv = existingProvenance.nodeProvenance + context.sourceName - // Add variant provenance for new variants - val updatedVariantProv = newVariants.foldLeft(existingProvenance.variantProvenance) { (prov, variant) => + // Add variant provenance for new variants (primary names only for provenance tracking) + val variantNames = primaryVariantNames(newVariants) + val updatedVariantProv = variantNames.foldLeft(existingProvenance.variantProvenance) { (prov, variant) => prov.updatedWith(variant) { case Some(sources) => Some(sources + context.sourceName) case None => Some(Set(context.sourceName)) @@ -460,21 +486,39 @@ class HaplogroupTreeMergeService @Inject()( /** * Associate variants with a haplogroup, finding or creating variants as needed. */ - private def associateVariants(haplogroupId: Int, variantNames: List[String]): Future[Int] = { - if (variantNames.isEmpty) { + private def associateVariants(haplogroupId: Int, variants: List[VariantInput]): Future[Int] = { + if (variants.isEmpty) { Future.successful(0) } else { - // For each variant name, find existing variants by name and associate them - Future.traverse(variantNames) { variantName => - variantRepository.searchByName(variantName).flatMap { variants => + // For each variant, find existing variants by primary name and associate them, + // then create alias records for any aliases + Future.traverse(variants) { variantInput => + // First find/associate the primary variant + variantRepository.searchByName(variantInput.name).flatMap { foundVariants => // Associate all found variants with this haplogroup - Future.traverse(variants) { variant => + val associateFutures = foundVariants.map { variant => variant.variantId match { - case Some(vid) => haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, vid) + case Some(vid) => + for { + // Associate variant with haplogroup + count <- haplogroupVariantRepository.addVariantToHaplogroup(haplogroupId, vid) + // Create alias records for any aliases from the ISOGG data + _ <- Future.traverse(variantInput.aliases) { alias => + val variantAlias = VariantAlias( + variantId = vid, + aliasType = "common_name", + aliasValue = alias, + source = Some("ISOGG"), + isPrimary = false + ) + variantAliasRepository.addAlias(variantAlias).recover { case _ => false } + } + } yield count case None => Future.successful(0) } } - }.map(_.sum) + Future.sequence(associateFutures).map(_.sum) + } }.map(_.sum) } } diff --git a/test/models/api/haplogroups/TreeMergeModelsSpec.scala b/test/models/api/haplogroups/TreeMergeModelsSpec.scala index 80261a4..4e77597 100644 --- a/test/models/api/haplogroups/TreeMergeModelsSpec.scala +++ b/test/models/api/haplogroups/TreeMergeModelsSpec.scala @@ -7,29 +7,77 @@ import play.api.libs.json.{JsError, JsSuccess, Json} class TreeMergeModelsSpec extends AnyFunSpec with Matchers { + describe("VariantInput") { + + describe("JSON serialization") { + + it("should deserialize a simple variant") { + val json = Json.parse("""{"name": "M207"}""") + json.validate[VariantInput] match { + case JsSuccess(v, _) => + v.name mustBe "M207" + v.aliases mustBe List.empty + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should deserialize a variant with aliases") { + val json = Json.parse("""{"name": "M207", "aliases": ["Page37", "UTY2"]}""") + json.validate[VariantInput] match { + case JsSuccess(v, _) => + v.name mustBe "M207" + v.aliases mustBe List("Page37", "UTY2") + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + + it("should serialize to JSON") { + val variant = VariantInput("M207", List("Page37", "UTY2")) + val json = Json.toJson(variant) + (json \ "name").as[String] mustBe "M207" + (json \ "aliases").as[List[String]] mustBe List("Page37", "UTY2") + } + } + } + describe("PhyloNodeInput") { describe("JSON serialization") { - it("should deserialize a simple node") { + it("should deserialize a simple node with variant objects") { val json = Json.parse("""{ "name": "R1b-L21", - "variants": ["L21", "S145"] + "variants": [{"name": "L21"}, {"name": "S145"}] }""") json.validate[PhyloNodeInput] match { case JsSuccess(node, _) => node.name mustBe "R1b-L21" - node.variants mustBe List("L21", "S145") + node.variants.map(_.name) mustBe List("L21", "S145") node.children mustBe List.empty case JsError(errors) => fail(s"Parse failed: $errors") } } + it("should deserialize a node with variant aliases") { + val json = Json.parse("""{ + "name": "R", + "variants": [{"name": "M207", "aliases": ["Page37", "UTY2"]}] + }""") + + json.validate[PhyloNodeInput] match { + case JsSuccess(node, _) => + node.variants must have size 1 + node.variants.head.name mustBe "M207" + node.variants.head.aliases mustBe List("Page37", "UTY2") + case JsError(errors) => fail(s"Parse failed: $errors") + } + } + it("should deserialize node with all age fields") { val json = Json.parse("""{ "name": "R1b-L21", - "variants": ["L21"], + "variants": [{"name": "L21"}], "formedYbp": 4500, "formedYbpLower": 4200, "formedYbpUpper": 4800, @@ -53,15 +101,15 @@ class TreeMergeModelsSpec extends AnyFunSpec with Matchers { it("should deserialize nested children") { val json = Json.parse("""{ "name": "R1b-L21", - "variants": ["L21"], + "variants": [{"name": "L21"}], "children": [ { "name": "R1b-DF13", - "variants": ["DF13"], + "variants": [{"name": "DF13"}], "children": [ { "name": "R1b-Z39589", - "variants": ["Z39589"] + "variants": [{"name": "Z39589"}] } ] } @@ -82,17 +130,17 @@ class TreeMergeModelsSpec extends AnyFunSpec with Matchers { it("should serialize to JSON") { val node = PhyloNodeInput( name = "R1b-L21", - variants = List("L21", "S145"), + variants = List(VariantInput("L21"), VariantInput("S145")), formedYbp = Some(4500), children = List( - PhyloNodeInput(name = "R1b-DF13", variants = List("DF13")) + PhyloNodeInput(name = "R1b-DF13", variants = List(VariantInput("DF13"))) ) ) val json = Json.toJson(node) (json \ "name").as[String] mustBe "R1b-L21" - (json \ "variants").as[List[String]] mustBe List("L21", "S145") + (json \ "variants").as[List[VariantInput]].map(_.name) mustBe List("L21", "S145") (json \ "formedYbp").as[Int] mustBe 4500 (json \ "children").as[List[PhyloNodeInput]] must have size 1 } @@ -194,7 +242,7 @@ class TreeMergeModelsSpec extends AnyFunSpec with Matchers { "haplogroupType": "Y", "sourceTree": { "name": "R1b", - "variants": ["M269"] + "variants": [{"name": "M269"}] }, "sourceName": "ytree.net", "priorityConfig": { @@ -255,7 +303,7 @@ class TreeMergeModelsSpec extends AnyFunSpec with Matchers { "anchorHaplogroupName": "R1b", "sourceTree": { "name": "R1b-L21", - "variants": ["L21"] + "variants": [{"name": "L21"}] }, "sourceName": "ytree.net", "dryRun": false diff --git a/test/services/HaplogroupTreeMergeServiceSpec.scala b/test/services/HaplogroupTreeMergeServiceSpec.scala index 2153a56..75291b9 100644 --- a/test/services/HaplogroupTreeMergeServiceSpec.scala +++ b/test/services/HaplogroupTreeMergeServiceSpec.scala @@ -11,7 +11,7 @@ import org.scalatest.concurrent.ScalaFutures import org.scalatest.time.{Millis, Seconds, Span} import org.scalatestplus.mockito.MockitoSugar import org.scalatestplus.play.PlaySpec -import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantRepository} +import repositories.{HaplogroupCoreRepository, HaplogroupVariantRepository, VariantAliasRepository, VariantRepository} import java.time.LocalDateTime import scala.concurrent.{ExecutionContext, Future} @@ -25,6 +25,7 @@ class HaplogroupTreeMergeServiceSpec extends PlaySpec with MockitoSugar with Sca var mockHaplogroupRepo: HaplogroupCoreRepository = _ var mockVariantRepo: HaplogroupVariantRepository = _ var mockVariantRepository: VariantRepository = _ + var mockVariantAliasRepository: VariantAliasRepository = _ var service: HaplogroupTreeMergeService = _ // Test fixtures @@ -57,7 +58,7 @@ class HaplogroupTreeMergeServiceSpec extends PlaySpec with MockitoSugar with Sca formedYbp: Option[Int] = None ): PhyloNodeInput = PhyloNodeInput( name = name, - variants = variants, + variants = variants.map(v => VariantInput(v)), // Convert strings to VariantInput children = children, formedYbp = formedYbp ) @@ -66,7 +67,13 @@ class HaplogroupTreeMergeServiceSpec extends PlaySpec with MockitoSugar with Sca mockHaplogroupRepo = mock[HaplogroupCoreRepository] mockVariantRepo = mock[HaplogroupVariantRepository] mockVariantRepository = mock[VariantRepository] - service = new HaplogroupTreeMergeService(mockHaplogroupRepo, mockVariantRepo, mockVariantRepository) + mockVariantAliasRepository = mock[VariantAliasRepository] + service = new HaplogroupTreeMergeService( + mockHaplogroupRepo, + mockVariantRepo, + mockVariantRepository, + mockVariantAliasRepository + ) } "HaplogroupTreeMergeService" should { From 307268e4f2ef3d2eb1852eb938c8f78ec0d01546 Mon Sep 17 00:00:00 2001 From: jkane Date: Fri, 12 Dec 2025 15:52:07 -0600 Subject: [PATCH 3/6] **refactor(repositories): Improve query efficiency and simplify JSONB handling** - Updated `addVariantToHaplogroup` to use `ON CONFLICT DO NOTHING` for conflict-free inserts. - Enhanced JSONB mappers to handle `null` uniformly and streamlined `HaplogroupProvenance` mapping logic. --- app/models/dal/MyPostgresProfile.scala | 26 +++++++------------ .../HaplogroupVariantRepository.scala | 9 ++++--- conf/application.conf | 3 +++ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/app/models/dal/MyPostgresProfile.scala b/app/models/dal/MyPostgresProfile.scala index 7112406..36b675c 100644 --- a/app/models/dal/MyPostgresProfile.scala +++ b/app/models/dal/MyPostgresProfile.scala @@ -276,7 +276,8 @@ trait MyPostgresProfile extends ExPostgresProfile case None => JsNull }, { jsValue => - if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None + // Handle database NULL (Java null), JSON null, or empty object + if (jsValue == null || jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None else Some(jsValue.as[IdentityVerification]) } ) @@ -290,7 +291,8 @@ trait MyPostgresProfile extends ExPostgresProfile case None => JsNull }, { jsValue => - if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None + // Handle database NULL (Java null), JSON null, or empty object + if (jsValue == null || jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None else Some(jsValue.as[ManualOverride]) } ) @@ -304,28 +306,20 @@ trait MyPostgresProfile extends ExPostgresProfile case None => JsNull }, { jsValue => - if (jsValue == JsNull) None + // Handle database NULL (Java null) or JSON null + if (jsValue == null || jsValue == JsNull) None else Some(jsValue.as[Seq[AuditEntry]]) } ) } // --- Haplogroup Provenance JSONB Type Mapper --- + // Maps HaplogroupProvenance directly to JsValue. For nullable columns, use column[Option[HaplogroupProvenance]] + // and Slick will handle NULL automatically. import models.domain.haplogroups.HaplogroupProvenance - implicit val haplogroupProvenanceJsonbTypeMapper: JdbcType[Option[HaplogroupProvenance]] with BaseTypedType[Option[HaplogroupProvenance]] = { - import play.api.libs.json.{JsNull, JsObject} - MappedJdbcType.base[Option[HaplogroupProvenance], JsValue]( - { - case Some(prov) => Json.toJson(prov) - case None => JsNull - }, - { jsValue => - if (jsValue == JsNull || (jsValue.isInstanceOf[JsObject] && jsValue.as[JsObject].value.isEmpty)) None - else Some(jsValue.as[HaplogroupProvenance]) - } - ) - } + implicit val haplogroupProvenanceJsonbTypeMapper: JdbcType[HaplogroupProvenance] with BaseTypedType[HaplogroupProvenance] = + MappedJdbcType.base[HaplogroupProvenance, JsValue](Json.toJson(_), _.as[HaplogroupProvenance]) // Declare the name of an aggregate function: val ArrayAgg = new SqlAggregateFunction("array_agg") diff --git a/app/repositories/HaplogroupVariantRepository.scala b/app/repositories/HaplogroupVariantRepository.scala index 66477a4..68368ee 100644 --- a/app/repositories/HaplogroupVariantRepository.scala +++ b/app/repositories/HaplogroupVariantRepository.scala @@ -174,9 +174,12 @@ class HaplogroupVariantRepositoryImpl @Inject()( } override def addVariantToHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] = { - val insertion = (haplogroupVariants returning haplogroupVariants.map(_.haplogroupVariantId)) += - HaplogroupVariant(None, haplogroupId, variantId) - runQuery(insertion) + val insertAction = sqlu""" + INSERT INTO haplogroup_variant (haplogroup_id, variant_id) + VALUES ($haplogroupId, $variantId) + ON CONFLICT (haplogroup_id, variant_id) DO NOTHING + """ + runQuery(insertAction) } def removeVariantFromHaplogroup(haplogroupId: Int, variantId: Int): Future[Int] = { diff --git a/conf/application.conf b/conf/application.conf index b249d7e..19cdd0d 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -10,6 +10,9 @@ play.i18n { # No need to create cookies in a read-only application. Remove when appropriate play.http.session.disabled=true +# Increase max request body size for tree merge API (default is 100KB) +play.http.parser.maxMemoryBuffer = 10MB + # Disable the startup banner play.application.showBanner=false From 213eb450aafc332bb66998ef05641a9cf8993e6f Mon Sep 17 00:00:00 2001 From: James R Kane Date: Fri, 12 Dec 2025 16:42:22 -0600 Subject: [PATCH 4/6] feat: Update haplogroup views to display detailed provenance information - Replace "Confidence" column with "Provenance" in haplogroup list view. - Add badges for primary credit and contributors in list and detail views. - Extend detail panel with comprehensive provenance details, including contributors, last merge info, and variant sources. --- .../haplogroups/detailPanel.scala.html | 66 +++++++++++++++++++ .../haplogroups/listFragment.scala.html | 16 ++++- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/app/views/curator/haplogroups/detailPanel.scala.html b/app/views/curator/haplogroups/detailPanel.scala.html index 3c0d0e0..e04f678 100644 --- a/app/views/curator/haplogroups/detailPanel.scala.html +++ b/app/views/curator/haplogroups/detailPanel.scala.html @@ -48,6 +48,72 @@
@haplogroup.name
} + @haplogroup.provenance.map { prov => +
+
Provenance
+
+
Primary Credit
+
+ + @prov.primaryCredit + +
+ + @if(prov.nodeProvenance.nonEmpty) { +
Contributors
+
+ @for(source <- prov.nodeProvenance.toSeq.sorted) { + @source + } +
+ } + + @prov.lastMergedFrom.map { source => +
Last Merged
+
+ + from @source + @prov.lastMergedAt.map { ts => + on @ts.toLocalDate + } + +
+ } + + @if(prov.variantProvenance.nonEmpty) { +
Variant Sources
+
+ +
+
    + @for((variant, sources) <- prov.variantProvenance.toSeq.sortBy(_._1).take(20)) { +
  • + @variant + + @for(src <- sources.toSeq.sorted) { + @src + } + +
  • + } + @if(prov.variantProvenance.size > 20) { +
  • + +@(prov.variantProvenance.size - 20) more variants... +
  • + } +
+
+
+ } +
+ } +
Tree Position
diff --git a/app/views/curator/haplogroups/listFragment.scala.html b/app/views/curator/haplogroups/listFragment.scala.html index 980cb90..a5f18be 100644 --- a/app/views/curator/haplogroups/listFragment.scala.html +++ b/app/views/curator/haplogroups/listFragment.scala.html @@ -11,7 +11,7 @@ Name Type Source - Confidence + Provenance @@ -29,7 +29,19 @@ @hg.source - @hg.confidenceLevel + + @hg.provenance.map { prov => + + @prov.primaryCredit + @if(prov.nodeProvenance.size > 1) { + +@(prov.nodeProvenance.size - 1) + } + + }.getOrElse { + - + } + Date: Sat, 13 Dec 2025 03:22:50 -0600 Subject: [PATCH 5/6] **feat(haplogroups): Add provenance rendering to SNP detail sidebar** - Introduced `findHaplogroupWithVariants` in `HaplogroupTreeService` to retrieve haplogroup details with associated variants and provenance. - Updated `TreeController` to use the new method for populating additional provenance data in the SNP detail sidebar. - Enhanced `snpDetailSidebar.scala.html` to display haplogroup provenance along with styling updates. - Added utility method `formatSourceName` for consistent provenance source formatting. --- app/controllers/TreeController.scala | 6 +- app/services/HaplogroupTreeService.scala | 16 +++ .../fragments/snpDetailSidebar.scala.html | 98 ++++++++++++++++++- 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/app/controllers/TreeController.scala b/app/controllers/TreeController.scala index e175c83..1de184b 100644 --- a/app/controllers/TreeController.scala +++ b/app/controllers/TreeController.scala @@ -4,6 +4,7 @@ import config.FeatureFlags import models.HaplogroupType import models.HaplogroupType.{MT, Y} import models.api.{SubcladeDTO, TreeNodeDTO} +import models.domain.haplogroups.HaplogroupProvenance import models.view.TreeViewModel import org.webjars.play.WebJarsUtil import play.api.cache.{AsyncCacheApi, Cached} @@ -272,8 +273,9 @@ class TreeController @Inject()(val controllerComponents: MessagesControllerCompo } def getSnpDetailSidebar(haplogroupName: String, haplogroupType: HaplogroupType): Action[AnyContent] = Action.async { implicit request => - treeService.findVariantsForHaplogroup(haplogroupName, haplogroupType).map { snps => - Ok(views.html.fragments.snpDetailSidebar(haplogroupName, snps)) + treeService.findHaplogroupWithVariants(haplogroupName, haplogroupType).map { case (haplogroup, snps) => + val provenance = haplogroup.flatMap(_.provenance) + Ok(views.html.fragments.snpDetailSidebar(haplogroupName, snps, provenance)) } } diff --git a/app/services/HaplogroupTreeService.scala b/app/services/HaplogroupTreeService.scala index 5350296..fe49965 100644 --- a/app/services/HaplogroupTreeService.scala +++ b/app/services/HaplogroupTreeService.scala @@ -259,6 +259,22 @@ class HaplogroupTreeService @Inject()( } yield treeLists.flatten } + /** + * Finds and retrieves haplogroup details with all associated genomic variants. + * + * This method fetches the haplogroup (including provenance) and its linked variants. + * + * @param haplogroupName The name of the haplogroup for which details are to be retrieved. + * @param haplogroupType The type of haplogroup (e.g., Y-DNA or mtDNA). + * @return A Future containing a tuple of (Option[Haplogroup], Seq[VariantDTO]). + */ + def findHaplogroupWithVariants(haplogroupName: String, haplogroupType: HaplogroupType): Future[(Option[Haplogroup], Seq[VariantDTO])] = { + for { + haplogroup <- coreRepository.getHaplogroupByName(haplogroupName, haplogroupType) + variants <- findVariantsForHaplogroup(haplogroupName, haplogroupType) + } yield (haplogroup, variants) + } + /** * Finds and retrieves all genomic variants associated with a specified haplogroup. * diff --git a/app/views/fragments/snpDetailSidebar.scala.html b/app/views/fragments/snpDetailSidebar.scala.html index 4bdb0b3..cc9f9f2 100644 --- a/app/views/fragments/snpDetailSidebar.scala.html +++ b/app/views/fragments/snpDetailSidebar.scala.html @@ -1,7 +1,8 @@ @import controllers.routes.TreeController @import models.api.VariantDTO +@import models.domain.haplogroups.HaplogroupProvenance -@(haplogroupName: String, snps: Seq[VariantDTO])(implicit messages: Messages) +@(haplogroupName: String, snps: Seq[VariantDTO], provenance: Option[HaplogroupProvenance] = None)(implicit messages: Messages)