Slow mongo queries
See original GitHub issueSeveral mongo queries are really slow which occasionally can cause timeout issues for users.
The general issue are that many queries uses $and/$or statements which require mongo to evaluate all cases/ variants. Some views such as the variants list view require multiple db queries. This causes several issues since our database containing 2,947 cases and 15,469,895 variants. Most queries takes about 200-300ms with some reaching towards 2000ms.
Solution ideas
I do think we need to transition from aggregation and filters in and instead store references to the variants in the case object.
Examples of slow queries
Case view
{
"t": {
"$date": "2021-10-04T11:15:54.678+00:00"
},
"s": "I",
"c": "COMMAND",
"id": 51803,
"ctx": "conn436",
"msg": "Slow query",
"attr": {
"type": "command",
"ns": "scout.variant",
"command": {
"find": "variant",
"filter": {
"$and": [
{
"case_id": "XXX-21"
},
{
"$or": [
{
"acmg_classification": {
"$exists": true
}
},
{
"manual_rank": {
"$exists": true
}
},
{
"cancer_tier": {
"$exists": true
}
},
{
"dismiss_variant": {
"$exists": true
}
},
{
"mosaic_tags": {
"$exists": true
}
}
]
}
]
},
"lsid": {
"id": {
"$uuid": "1221e638-6091-4660-9d62-4b37327a875f"
}
},
"$db": "scout",
"$readPreference": {
"mode": "primaryPreferred"
}
},
"planSummary": "IXSCAN { case_id: 1, category: 1, rank_score: -1 }",
"keysExamined": 12013,
"docsExamined": 12013,
"fromMultiPlanner": true,
"replanned": true,
"replanReason": "cached plan was less efficient than expected: expected trial execution to take 135 works but it took at least 1350 works",
"cursorExhausted": true,
"numYields": 112,
"nreturned": 0,
"queryHash": "88035F71",
"planCacheKey": "786184B2",
"reslen": 102,
"locks": {
"Global": {
"acquireCount": {
"r": 113
}
},
"Mutex": {
"acquireCount": {
"r": 1
}
}
},
"storage": {
"data": {
"bytesRead": 172780996,
"timeReadingMicros": 1534602
}
},
"remote": "172.22.0.7:39600",
"protocol": "op_msg",
"durationMillis": 1987
}
}
Variants list view
{
"t": {
"$date": "2021-10-04T11:16:40.893+00:00"
},
"s": "I",
"c": "COMMAND",
"id": 51803,
"ctx": "conn432",
"msg": "Slow query",
"attr": {
"type": "command",
"ns": "scout.variant",
"command": {
"aggregate": "variant",
"pipeline": [
{
"$match": {
"case_id": "xxxx-21",
"category": "snv",
"variant_type": "clinical",
"hgnc_ids": {
"$in": [
11277,
11278,
30224,
3602,
20,
23573,
15911,
12840,
4137,
25135,
7224,
7225,
7737,
7739,
9788,
8768,
1090,
4162,
6215,
3148,
1618,
2135,
15968,
8806,
15974,
8811,
5228,
25198,
15984,
12405,
5246,
7808,
644,
2711,
7323,
8860,
26784,
10914,
18083,
53924,
3239,
28845,
13997,
6323,
19125,
25786,
4283,
15559,
4296,
14540,
4816,
11474,
2277,
8940,
23791,
9969,
6898,
29427,
9462,
20731,
16636,
14103,
10519,
7450,
799,
6946,
10542,
4912,
10548,
10549,
17208,
4922,
10555,
24891,
16187,
5959,
8011,
10583,
8031,
869,
10597,
25964,
3951,
888,
12666,
2961,
2974,
9118,
2976,
5542,
939,
12718,
29105,
445,
7106,
2498,
16841,
24526,
15832,
11226,
30171,
9179,
8157,
11231,
13797,
16873,
6636,
16877,
11758,
23537,
7154,
15868,
7679
]
}
}
},
{
"$group": {
"_id": 1,
"n": {
"$sum": 1
}
}
}
],
"cursor": {},
"lsid": {
"id": {
"$uuid": "f453cbea-583d-4802-9b33-ea51adf578bc"
}
},
"$db": "scout",
"$readPreference": {
"mode": "primaryPreferred"
}
},
"planSummary": "IXSCAN { case_id: 1, category: 1, rank_score: -1 }",
"keysExamined": 10722,
"docsExamined": 10722,
"fromMultiPlanner": true,
"cursorExhausted": true,
"numYields": 54,
"nreturned": 1,
"queryHash": "D2C3975A",
"planCacheKey": "42566B0C",
"reslen": 126,
"locks": {
"Global": {
"acquireCount": {
"r": 56
}
},
"Mutex": {
"acquireCount": {
"r": 2
}
}
},
"storage": {
"data": {
"bytesRead": 70685126,
"timeReadingMicros": 62137
}
},
"remote": "172.22.0.7:39302",
"protocol": "op_msg",
"durationMillis": 349
}
}
{
"t": {
"$date": "2021-10-04T11:16:42.026+00:00"
},
"s": "I",
"c": "COMMAND",
"id": 51803,
"ctx": "conn432",
"msg": "Slow query",
"attr": {
"type": "command",
"ns": "scout.variant",
"command": {
"findAndModify": "variant",
"query": {
"_id": "3945d3b1e5e79fdff83a10cb4fd826da"
},
"new": true,
"update": {
"_id": "3945d3b1e5e79fdff83a10cb4fd826da",
"document_id": "3945d3b1e5e79fdff83a10cb4fd826da",
"variant_id": "a35bf1b581c51745e82c3ee6e780ac4e",
"display_name": "X_130149545_A_G_clinical",
"variant_type": "clinical",
"case_id": "xxxx-21",
"chromosome": "X",
"reference": "A",
"alternative": "G",
"institute": "klingen_38",
"missing_data": false,
"position": 130149545,
"rank_score": 4,
"end": 130149545,
"length": 1,
"simple_id": "X_130149545_A_G",
"quality": 540.7999877929688,
"filters": [
"PASS"
],
"dbsnp_id": "rs1139851",
"cosmic_ids": [
"COSV54852615"
],
"category": "snv",
"sub_category": "snv",
"mate_id": null,
"cytoband_start": "q26.1",
"cytoband_end": "q26.1",
"end_chrom": "X",
"samples": [
{
"sample_id": "9632-21",
"display_name": "9632-21",
"genotype_call": "1/1",
"allele_depths": [
0,
22
],
"read_depth": 22,
"alt_frequency": 1,
"genotype_quality": 66,
"so": null
}
],
"genetic_models": [
"XD",
"XR"
],
"genes": [
{
"hgnc_id": 8768,
"hgnc_symbol": "AIFM1",
"ensembl_id": "ENSG00000156709",
"description": "apoptosis inducing factor mitochondria associated 1",
"inheritance": [
"XR"
],
"phenotypes": [
{
"mim_number": 310490,
"description": "Cowchock syndrome",
"inheritance_models": [
"XR"
],
"status": "established"
},
{
"mim_number": 300232,
"description": "Spondyloepimetaphyseal dysplasia X-linked with hypomyelinating leukodystrophy",
"inheritance_models": [
"XR"
],
"status": "established"
},
{
"mim_number": 300816,
"description": "Combined oxidative phosphorylation deficiency 6",
"inheritance_models": [
"XR"
],
"status": "established"
},
{
"mim_number": 300614,
"description": "Deafness X-linked 5",
"inheritance_models": [
"XR"
],
"status": "established"
}
],
"transcripts": [
{
"transcript_id": "ENST00000287295",
"hgnc_id": 8768,
"protein_id": "ENSP00000287295",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "O95831.210",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"mane_select_transcript": "NM_004208.4",
"is_canonical": false
},
{
"transcript_id": "ENST00000319908",
"hgnc_id": 8768,
"protein_id": "ENSP00000315122",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "O95831.210",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000346424",
"hgnc_id": 8768,
"protein_id": "ENSP00000316320",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "O95831.210",
"biotype": "protein_coding",
"functional_annotations": [
"intron_variant"
],
"region_annotations": [
"intronic"
],
"intron": "1/7",
"strand": "-",
"coding_sequence_name": "c.107-12360T>C",
"is_canonical": false
},
{
"transcript_id": "ENST00000416073",
"hgnc_id": 8768,
"protein_id": "ENSP00000402535",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "O95831.210",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"synonymous_variant",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000527892",
"hgnc_id": 8768,
"protein_id": "ENSP00000435955",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"stop_lost",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "2/16",
"strand": "-",
"coding_sequence_name": "c.130T>C",
"protein_sequence_name": "p.Ter44ArgextTer5",
"is_canonical": false
},
{
"transcript_id": "ENST00000529877",
"hgnc_id": 8768,
"protein_id": "ENSP00000432998",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"3_prime_UTR_variant",
"NMD_transcript_variant"
],
"region_annotations": [
"3UTR",
"ncRNA"
],
"exon": "4/6",
"strand": "-",
"coding_sequence_name": "c.*134T>C",
"is_canonical": false
},
{
"transcript_id": "ENST00000533719",
"hgnc_id": 8768,
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "retained_intron",
"functional_annotations": [
"non_coding_transcript_exon_variant"
],
"region_annotations": [
"ncRNA_exonic"
],
"exon": "2/12",
"strand": "-",
"coding_sequence_name": "n.65T>C",
"is_canonical": false
},
{
"transcript_id": "ENST00000535724",
"hgnc_id": 8768,
"protein_id": "ENSP00000446113",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "O95831.210",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/17",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000674546",
"hgnc_id": 8768,
"protein_id": "ENSP00000501950",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000674555",
"hgnc_id": 8768,
"protein_id": "ENSP00000502183",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"3_prime_UTR_variant",
"NMD_transcript_variant"
],
"region_annotations": [
"3UTR",
"ncRNA"
],
"exon": "4/17",
"strand": "-",
"coding_sequence_name": "c.*8T>C",
"is_canonical": false
},
{
"transcript_id": "ENST00000674722",
"hgnc_id": 8768,
"protein_id": "ENSP00000501693",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"synonymous_variant",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "3/15",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000674957",
"hgnc_id": 8768,
"protein_id": "ENSP00000501985",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"upstream_gene_variant"
],
"region_annotations": [
"upstream"
],
"strand": "-",
"is_canonical": false
},
{
"transcript_id": "ENST00000674997",
"hgnc_id": 8768,
"protein_id": "ENSP00000502124",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"stop_lost",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "2/15",
"strand": "-",
"coding_sequence_name": "c.130T>C",
"protein_sequence_name": "p.Ter44ArgextTer5",
"is_canonical": false
},
{
"transcript_id": "ENST00000675037",
"hgnc_id": 8768,
"protein_id": "ENSP00000501724",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"synonymous_variant",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000675050",
"hgnc_id": 8768,
"protein_id": "ENSP00000502606",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"synonymous_variant",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "3/17",
"strand": "-",
"coding_sequence_name": "c.261T>C",
"protein_sequence_name": "p.Asp87%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000675092",
"hgnc_id": 8768,
"protein_id": "ENSP00000501772",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": true
},
{
"transcript_id": "ENST00000675111",
"hgnc_id": 8768,
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "retained_intron",
"functional_annotations": [
"non_coding_transcript_exon_variant"
],
"region_annotations": [
"ncRNA_exonic"
],
"exon": "2/15",
"strand": "-",
"coding_sequence_name": "n.198T>C",
"is_canonical": false
},
{
"transcript_id": "ENST00000675240",
"hgnc_id": 8768,
"protein_id": "ENSP00000501907",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000675427",
"hgnc_id": 8768,
"protein_id": "ENSP00000501880",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000675774",
"hgnc_id": 8768,
"protein_id": "ENSP00000502690",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "nonsense_mediated_decay",
"functional_annotations": [
"stop_lost",
"NMD_transcript_variant"
],
"region_annotations": [
"exonic",
"ncRNA"
],
"exon": "2/8",
"strand": "-",
"coding_sequence_name": "c.130T>C",
"protein_sequence_name": "p.Ter44ArgextTer5",
"is_canonical": false
},
{
"transcript_id": "ENST00000675857",
"hgnc_id": 8768,
"protein_id": "ENSP00000502721",
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "protein_coding",
"functional_annotations": [
"synonymous_variant"
],
"region_annotations": [
"exonic"
],
"exon": "3/16",
"strand": "-",
"coding_sequence_name": "c.273T>C",
"protein_sequence_name": "p.Asp91%3D",
"is_canonical": false
},
{
"transcript_id": "ENST00000676048",
"hgnc_id": 8768,
"sift_prediction": "unknown",
"polyphen_prediction": "unknown",
"swiss_prot": "unknown",
"biotype": "retained_intron",
"functional_annotations": [
"non_coding_transcript_exon_variant"
],
"region_annotations": []
}
]
}
]
}
},
"planSummary": "IDHACK",
"keysExamined": 1,
"docsExamined": 1,
"nMatched": 1,
"nModified": 0,
"nUpserted": 0,
"numYields": 0,
"reslen": 16611,
"locks": {
"ParallelBatchWriterMode": {
"acquireCount": {
"r": 1
}
},
"ReplicationStateTransition": {
"acquireCount": {
"w": 1
}
},
"Global": {
"acquireCount": {
"w": 1
}
},
"Database": {
"acquireCount": {
"w": 1
}
},
"Collection": {
"acquireCount": {
"w": 1
}
},
"Mutex": {
"acquireCount": {
"r": 1
}
}
},
"flowControl": {
"acquireCount": 1,
"timeAcquiringMicros": 2
},
"storage": {
"data": {
"bytesRead": 26714,
"timeReadingMicros": 272548
}
},
"remote": "172.22.0.7:39302",
"protocol": "op_msg",
"durationMillis": 272
},
"truncated": {
"command": {
"update": {
"genes": {
"0": {
"transcripts": {
"21": {
"region_annotations": {
"0": {
"type": "string",
"size": 17
}
}
}
}
}
}
}
}
},
"size": {
"command": 16719
}
}
Update variants
{
"t": {
"$date": "2021-10-01T12:02:59.725+00:00"
},
"s": "I",
"c": "COMMAND",
"id": 51803,
"ctx": "conn5071",
"msg": "Slow query",
"attr": {
"type": "command",
"ns": "scout.$cmd",
"command": {
"update": "variant",
"ordered": false,
"lsid": {
"id": {
"$uuid": "eb0bf020-75fb-499d-985a-29c85e90567b"
}
},
"$db": "scout"
},
"numYields": 14,
"reslen": 60,
"locks": {
"ParallelBatchWriterMode": {
"acquireCount": {
"r": 5014
}
},
"ReplicationStateTransition": {
"acquireCount": {
"w": 5016
}
},
"Global": {
"acquireCount": {
"r": 2,
"w": 5014
}
},
"Database": {
"acquireCount": {
"w": 5014
}
},
"Collection": {
"acquireCount": {
"w": 5014
}
},
"Mutex": {
"acquireCount": {
"r": 5000
}
}
},
"flowControl": {
"acquireCount": 5014,
"timeAcquiringMicros": 4972
},
"storage": {
"data": {
"bytesRead": 1045257,
"timeReadingMicros": 1507
}
},
"remote": "172.22.0.7:47452",
"protocol": "op_msg",
"durationMillis": 1878
}
}
Issue Analytics
- State:
- Created 2 years ago
- Comments:9 (7 by maintainers)
Top Results From Across the Web
Handling Slow Queries In MongoDB Pt. 1 - Rockset
In this blog, we'll explore a few key ways to understand and address slow queries in MongoDB. We'll also take a look at...
Read more >Analyze Slow Queries — MongoDB Atlas
The Query Profiler displays slow-running operations and their key performance statistics. You can explore a sample of historical queries for up to the...
Read more >Troubleshooting MongoDB 100% CPU load and slow queries
Solution #1: db.currentOp() ... The db.currentOp() function lists the currently running queries with very detailed information. It also includes ...
Read more >How to find queries not using indexes or slow in mongodb
With profiling enabled for a database, slow operations are written to the system.profile capped collection (which by default is 1Mb in size).
Read more >Finding slow queries in MongoDB - Oodles Technologies
or slow queries and operations. ... By default, mongod records slow queries to its log, as defined by slowOpThresholdMs. ... Enabling database profiler...
Read more >Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start FreeTop Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Top GitHub Comments
Reopening this as not all slow queries discussed here have been optimized!
2021-10-25T12:37:23.828+0200 I COMMAND [conn14176] command scout.variant command: find { find: “variant”, filter: { $and: [ { case_id: “setgator” }, { $or: [ { acmg_classification: { $exists: true } }, { manual_rank: { $exists: true } }, { cancer_tier: { $exists: true } }, { dismiss_variant: { $exists: true } }, { mosaic_tags: { $exists: true } } ] } ] } } planSummary: IXSCAN { case_id: 1, category: 1, variant_type: 1, chromosome: 1, start: 1, end: 1 } keysExamined:6689 docsExamined:6689 cursorExhausted:1 keyUpdates:0 writeConflicts:0 numYields:78 nreturned:29 reslen:257126 locks:{ Global: { acquireCount: { r: 158 } }, Database: { acquireCount: { r: 79 } }, Collection: { acquireCount: { r: 79 } } } protocol:op_query 1251ms