From 30c0e587d8592e157ee4f26f4eeae28382613088 Mon Sep 17 00:00:00 2001
From: James Billingsley <jbillingsley@hsph.harvard.edu>
Date: Wed, 9 Jul 2025 12:17:48 -0400
Subject: [PATCH] adding_billingsley_olink_code

---
 .DS_Store                              | Bin 0 -> 6148 bytes
 404.html                               | 334 ----------------
 BillingsleyOlinkCode/SenOlinkRept1.Rmd | 502 +++++++++++++++++++++++++
 BillingsleyOlinkCode/SenOlinkRept3.Rmd | 480 +++++++++++++++++++++++
 4 files changed, 982 insertions(+), 334 deletions(-)
 create mode 100644 .DS_Store
 delete mode 100644 404.html
 create mode 100644 BillingsleyOlinkCode/SenOlinkRept1.Rmd
 create mode 100644 BillingsleyOlinkCode/SenOlinkRept3.Rmd
diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..389b6426573e16220757630fd5ec1c8c0ce91be7
GIT binary patch
literal 6148
zcmeHKQA@)x5WZ|vJ4D#S1Rn#w4(!|z#Fx^|XTgd-sLYiXE!IZX%^_pZXZ=I|5`T|(
zNv4b`_@X$ugUffh+$GDGk!t`z_@k%^Py+x5m9SLB<`bcD(gi75524U=B#^;1<d8zV
z5Y3Li$N-&P9ehae%u;MW&hHV9tmhUwpdf-c9Ow68szS_p3GdS+AG+?VD3vQKt831h
zv+i65H)<RV`@>Q0_Xp?HJ5eePXLdh4P2zE{R_~~6*iW)}pc9fP#*mA%B#YG8SEDS-
zb#7n=oT^jp)wU*+*4}PIHr@8LAt(F0ZbP<Pd(&yv+1%baIPTr1cbR%JBn$jMYT2+j
zgJ(3pz2ApmkfkcS#+=!Fvw0Yq0cL<1SY`(7o#w1B^CtMWGXu=Pk1#;@gG43tEEWd!
z)qxFtKGJxFkOXadOAtznp2fl-j-UvWifB@WePRfcj(%z5Jd1@vlMX_!jPux)g?*t2
zy*m1(4hP{G<dzv=2Id(kn{JKj|KZo~|M?{DF$2uNzhXdCx<S{+E!o<-usN!=67>p|
pgyIT=?<r{LQjD=winma;pkI=K=vgcbq6dXP1T+oYFay8Jz#GBLRdfIV

literal 0
HcmV?d00001

diff --git a/404.html b/404.html
deleted file mode 100644
index 8b5521d..0000000
--- a/404.html
+++ /dev/null
@@ -1,334 +0,0 @@
-
-<!doctype html>
-<html lang="en" class="no-js">
-  <head>
-    
-      <meta charset="utf-8">
-      <meta name="viewport" content="width=device-width,initial-scale=1">
-      
-      
-        <meta name="author" content="HBC">
-      
-      
-      
-      
-      
-      <link rel="icon" href="/hbctraining/Intro-to-R-mkdocs/./theme_figures/HBC_logo.png">
-      <meta name="generator" content="mkdocs-1.6.0, mkdocs-material-9.5.30">
-    
-    
-      
-        <title>Intro-to-R-mkdocs</title>
-      
-    
-    
-      <link rel="stylesheet" href="/hbctraining/Intro-to-R-mkdocs/assets/stylesheets/main.3cba04c6.min.css">
-      
-        
-        <link rel="stylesheet" href="/hbctraining/Intro-to-R-mkdocs/assets/stylesheets/palette.06af60db.min.css">
-      
-      
-
-
-    
-    
-      
-    
-    
-      
-        
-        
-        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
-        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Inter:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
-        <style>:root{--md-text-font:"Inter";--md-code-font:"Roboto Mono"}</style>
-      
-    
-    
-      <link rel="stylesheet" href="/hbctraining/Intro-to-R-mkdocs/stylesheets/extra.css">
-    
-    <script>__md_scope=new URL("/hbctraining/Intro-to-R-mkdocs/",location),__md_hash=e=>[...e].reduce((e,_)=>(e<<5)-e+_.charCodeAt(0),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
-    
-      
-
-    
-    
-    
-  </head>
-  
-  
-    
-    
-      
-    
-    
-    
-    
-    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="indigo">
-  
-    
-    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
-    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
-    <label class="md-overlay" for="__drawer"></label>
-    <div data-md-component="skip">
-      
-    </div>
-    <div data-md-component="announce">
-      
-    </div>
-    
-    
-
-
-<!-- Original Header -->
-
-
-<header class="md-header" data-md-component="header">
-  <nav class="md-header__inner md-grid" aria-label="Header">
-    <a href="/hbctraining/Intro-to-R-mkdocs/." title="Intro-to-R-mkdocs" class="md-header__button md-logo" aria-label="Intro-to-R-mkdocs" data-md-component="logo">
-      
-  <img src="/hbctraining/Intro-to-R-mkdocs/./theme_figures/HBC_logo_2.png" alt="logo">
-
-    </a>
-    <label class="md-header__button md-icon" for="__drawer">
-      
-      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3V6m0 5h18v2H3v-2m0 5h18v2H3v-2Z"/></svg>
-    </label>
-    <div class="md-header__title" data-md-component="header-title">
-      <div class="md-header__ellipsis">
-        <div class="md-header__topic">
-          <span class="md-ellipsis">
-            Intro-to-R-mkdocs
-          </span>
-        </div>
-        <div class="md-header__topic" data-md-component="header-topic">
-          <span class="md-ellipsis">
-            
-              
-            
-          </span>
-        </div>
-      </div>
-    </div>
-    
-      
-        <form class="md-header__option" data-md-component="palette">
-  
-    
-    
-    
-    <input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="indigo"  aria-label="Switch to light mode"  type="radio" name="__palette" id="__palette_0">
-    
-      <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
-        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9h-1.9M20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69m-9.15 3.96h2.3L12 9l-1.15 3.65Z"/></svg>
-      </label>
-    
-  
-    
-    
-    
-    <input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="blue-grey" data-md-color-accent="indigo"  aria-label="Switch to dark mode"  type="radio" name="__palette" id="__palette_1">
-    
-      <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
-        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg>
-      </label>
-    
-  
-    
-    
-    
-    <input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="blue-grey" data-md-color-accent="indigo"  aria-label="Switch to system preference"  type="radio" name="__palette" id="__palette_2">
-    
-      <label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
-        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12c0-2.42-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12 20 8.69Z"/></svg>
-      </label>
-    
-  
-</form>
-      
-    
-    
-      <script>var media,input,key,value,palette=__md_get("__palette");if(palette&&palette.color){"(prefers-color-scheme)"===palette.color.media&&(media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']"),palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent"));for([key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
-    
-    
-    
-      <label class="md-header__button md-icon" for="__search">
-        
-        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
-      </label>
-      <div class="md-search" data-md-component="search" role="dialog">
-  <label class="md-search__overlay" for="__search"></label>
-  <div class="md-search__inner" role="search">
-    <form class="md-search__form" name="search">
-      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
-      <label class="md-search__icon md-icon" for="__search">
-        
-        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.516 6.516 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5Z"/></svg>
-        
-        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12Z"/></svg>
-      </label>
-      <nav class="md-search__options" aria-label="Search">
-        
-        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
-          
-          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12 19 6.41Z"/></svg>
-        </button>
-      </nav>
-      
-    </form>
-    <div class="md-search__output">
-      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
-        <div class="md-search-result" data-md-component="search-result">
-          <div class="md-search-result__meta">
-            Initializing search
-          </div>
-          <ol class="md-search-result__list" role="presentation"></ol>
-        </div>
-      </div>
-    </div>
-  </div>
-</div>
-    
-    
-      <div class="md-header__source">
-        <a href="https://github.com/hbctraining/Intro-to-R-mkdocs" title="Go to repository" class="md-source" data-md-component="source">
-  <div class="md-source__icon md-icon">
-    
-    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81z"/></svg>
-  </div>
-  <div class="md-source__repository">
-    hbctraining/Intro-to-R-mkdocs
-  </div>
-</a>
-      </div>
-    
-  </nav>
-  
-</header>
-
-    <div class="md-container" data-md-component="container">
-      
-      
-        
-          
-            
-<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
-  <div class="md-grid">
-    <ul class="md-tabs__list">
-      
-        
-  
-  
-  
-    <li class="md-tabs__item">
-      <a href="/hbctraining/Intro-to-R-mkdocs/." class="md-tabs__link">
-        
-  
-    
-  
-  Home
-
-      </a>
-    </li>
-  
-
-      
-        
-  
-  
-  
-    <li class="md-tabs__item">
-      <a href="/hbctraining/Intro-to-R-mkdocs/Workshop_Schedule/" class="md-tabs__link">
-        
-  
-    
-  
-  Workshop Schedule
-
-      </a>
-    </li>
-  
-
-      
-    </ul>
-  </div>
-</nav>
-          
-        
-      
-      <main class="md-main" data-md-component="main">
-        <div class="md-main__inner md-grid">
-          
-  <div class="md-sidebar md-sidebar--primary" data-md-component="navigation">
-    <div class="md-sidebar__scrollwrap">
-      <div class="md-sidebar__inner">
-        
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
-  
-  
-</nav>
-      </div>
-    </div>
-  </div>
-
-          
-            <div class="md-content" data-md-component="content">
-              <article class="md-content__inner md-typeset">
-                
-  <h1>404 - Not found</h1>
-
-              </article>
-            </div>
-          
-          
-<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
-        </div>
-        
-          <button type="button" class="md-top md-icon" data-md-component="top" hidden>
-  
-  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8v12Z"/></svg>
-  Back to top
-</button>
-        
-      </main>
-      
-        <footer class="md-footer">
-  
-  <div class="md-footer-meta md-typeset">
-    <div class="md-footer-meta__inner md-grid">
-      <div class="md-copyright">
-  
-  
-    Made with
-    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
-      Material for MkDocs
-    </a>
-  
-</div>
-      
-    </div>
-  </div>
-</footer>
-      
-    </div>
-    <div class="md-dialog" data-md-component="dialog">
-      <div class="md-dialog__inner md-typeset"></div>
-    </div>
-    
-    
-    <script id="__config" type="application/json">{"base": "/hbctraining/Intro-to-R-mkdocs/", "features": ["navigation.tracking", "navigation.sections", "navigation.expand", "navigation.top", "navigation.tabs", "navigation.prune", "content.code.copy", "content.code.annotate"], "search": "/hbctraining/Intro-to-R-mkdocs/assets/javascripts/workers/search.b8dbb3d2.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
-    
-    
-      <script src="/hbctraining/Intro-to-R-mkdocs/assets/javascripts/bundle.fe8b6f2b.min.js"></script>
-      
-        <script src="/hbctraining/Intro-to-R-mkdocs/javascripts/mathjax.js"></script>
-      
-        <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
-      
-        <script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
-      
-    
-  </body>
-</html>
\ No newline at end of file
diff --git a/BillingsleyOlinkCode/SenOlinkRept1.Rmd b/BillingsleyOlinkCode/SenOlinkRept1.Rmd
new file mode 100644
index 0000000..ab378a8
--- /dev/null
+++ b/BillingsleyOlinkCode/SenOlinkRept1.Rmd
@@ -0,0 +1,502 @@
+
+---
+title: "SenOlinkReport1"
+author: "James Billingsley"
+date: "`r Sys.Date()`"
+output:
+  html_document:
+    code_folding: hide
+---
+
+```{r setup, include=FALSE, message=FALSE, warning = FALSE}
+knitr::opts_chunk$set(
+  echo = TRUE,
+  error = TRUE,
+  fig.align = "center",
+  fig.path = "/Users/jmb714/Desktop/figures/",
+  out.width = "75%",
+  message = FALSE,
+  warning = FALSE
+)
+
+clientname <- "Pritha Sen"
+clientemail <- "PSEN@BWH.HARVARD.EDU"
+labPI <- "Pritha Sen"
+lablocation <- "BWH"
+
+analystname <- "James Billingsley"
+analystemail <- "jbillingsley@hsph.harvard.edu"
+set.seed(42)
+datadir <- data_dir <- paste0(
+  "/Users/jmb714/Harvard University Dropbox/",
+  "HBC Team Folder (1)/Consults/pritha_sen/",
+  "sen_olink_human_blood_mpox_hbc05277/data/"
+)
+```
+
+**Olink REVEAL analysis of mpox samples [hbc05277] `r clientname`. **
+
+
+Contact `r analystname` (`r analystemail`) for additional details.
+
+The most recent update of this html document occurred: `r date()`
+
+
+The raw data are here: 
+
+```{r datadir}
+cat(datadir)
+```
+
+<br/>
+<br/>
+<br/>
+<br/>
+
+Olink Reveal data; Examining serum proteins in:
+
+1. Time course analysis of donors infected with Mpox, acute and resolution phases. 18 donors, Most with 3 timepoints, one with 4, some with a single timepoint.
+
+2. Time course analysis of participants receiving JYNNEOS mpox/smallpox prime-boost vaccine. Three timepoints, baseline (dose1), boost(dose2), postvacc.
+
+
+We have a single plate, 96 samples and 1037 assays. Some of the 96 samples are plate control samples and some of the assays are control assays.
+
+
+We'll examine each of the two experiments individually, and combined.
+
+Olink data (NPX) generally comes normalized with assay and sample QC flags from Olink. The experiment here uses the Reveal technology which measures approx 1000 proteins per well. 
+
+This experiment is a single plate experiment so no further normalization was required. If you have a multiplate experiment, additional normalization ,such as "bridging normalization" (between plates) will likely be required. This is readily done using the Olink_analyze R package. The package also provides convenient qc plotting functions and differential expression functions, both of which I use in this analysis.
+
+For QC, I look for flagged samples and assays, (no sample flags, 2 assay flags). I also do PCA plots looking for samples outliers (none found), and look for poorly performing assays. 
+
+To check for poorly performing assays, I look, for each assay, at the number of data points that fall below the Limit Of Detection.  In a multiplate experiment you can calculate the LOD for each assay using Sample controls on each plate. But for a single plate assay you cannot. In that case such as ours, Olink provides an LOD file that can be used as a substitute. 
+
+I plotted the percentage of below LOD data points for each assay. There was no obvious inflection point on the plot, so I chose to exclude any assays with greater than 95% below LOD data points. I think this is a sensible threshold for this technology and experiment. As a sanity check I also ran differential expression with no assays removed, and the results were quite similar. 
+
+Note, in this report I did not remove the two qc flagged assays. Please see the differential expression analyses reports for this project, for examples of filtering flagged assays.
+
+
+https://github.com/Olink-Proteomics/OlinkRPackage
+
+https://cran.r-project.org/web/packages/OlinkAnalyze/vignettes/LOD.html
+
+
+
+```{r libraries}
+library(tidyverse)
+library(Matrix)
+library(RCurl)
+library(knitr)
+library(patchwork)
+library(gridExtra)
+library(reshape2)
+library(Matrix.utils)
+library(DESeq2)
+library(EnhancedVolcano)
+library(OlinkAnalyze)
+library(readxl)
+```
+
+
+```{r readin}
+mpxv <- read.table(
+  file = "~/Desktop/MPXV_June2025_Extended_NPX_2025-06-17.csv",
+  sep  = ";",
+  h    = TRUE
+)
+
+glimpse(mpxv)
+hist(mpxv$NPX)
+
+mpxv %>%
+  summarise(
+    n_samples   = n_distinct(SampleID),
+    n_olink_ids = n_distinct(OlinkID),
+    n_assays    = n_distinct(Assay),
+    n_panels    = n_distinct(Panel),
+    n_wells     = n_distinct(WellID),
+    n_plate_ids = n_distinct(PlateID)
+  )
+```
+<br/>
+<br/>
+<br/>
+<br/>
+
+Some plots of samples in the full dataset
+
+```{r plotsA}
+olink_dist_plot(mpxv, color_g = "SampleQC") +
+  theme(
+    axis.text.x = element_text(angle = 90, hjust = 1, size = 5)
+  )
+
+olink_qc_plot(mpxv, color_g = "SampleQC", label_outliers = TRUE)
+
+olink_pca_plot(mpxv, color_g = "SampleQC", label_samples = TRUE)
+
+npx_flt <- mpxv %>%
+  filter(!str_detect(SampleID, regex("Control")))
+
+olink_dist_plot(npx_flt, color_g = "SampleQC") +
+  theme(
+    axis.text.x = element_text(angle = 90, hjust = 1, size = 5)
+  ) +
+  ggtitle("Control samples removed")
+
+olink_qc_plot(
+  npx_flt,
+  color_g         = "SampleQC",
+  label_outliers  = TRUE
+) +
+  ggtitle("Control samples removed")
+
+olink_pca_plot(npx_flt, color_g = "SampleQC", label_samples = TRUE)
+```
+
+<br/>
+<br/>
+<br/>
+<br/>
+
+Assay QC
+
+We'll look for assays that have a high percentage of normalized counts below published LOD.
+
+
+```{r qcAssays}
+mpxv <- olink_lod(
+  mpxv,
+  lod_file_path = "~/Desktop/Reveal_Fixed_LOD_csv_file.csv",
+  lod_method    = "FixedLOD"
+)
+
+assay_qc_summary <- mpxv %>%
+  group_by(OlinkID, Assay, AssayQC) %>%
+  summarise(
+    pct_below_lod = mean(NPX <= LOD, na.rm = TRUE),
+    cv            = sd(NPX, na.rm = TRUE) / mean(NPX, na.rm = TRUE),
+    .groups       = "drop"
+  ) %>%
+  arrange(desc(pct_below_lod), desc(cv))
+
+assay_qc_summary %>%
+  filter(pct_below_lod > 0.90) %>%
+  summarise(num_assays = n())
+
+ranked_assays <- assay_qc_summary %>%
+  arrange(pct_below_lod) %>%
+  mutate(order = row_number())
+
+ggplot(ranked_assays, aes(x = order, y = pct_below_lod)) +
+  geom_point(color = "steelblue") +
+  scale_y_continuous(labels = scales::percent_format(accuracy = 1)) +
+  labs(
+    title = "Percentage Below LOD by Assay (Ranked)",
+    x     = "Assay Rank (Low to High % Below LOD)",
+    y     = "% of Samples Below LOD"
+  ) +
+  theme_minimal()
+```
+
+<br/>
+<br/>
+<br/>
+<br/>
+
+The QC filter cutoff we use to reduce noise, (if we use one), is somewhat arbitrary. But I'll use here 90% as an example.
+
+70 assays have > 90 % data points below LOD
+
+If I use a 95% threshold it removes 35 assays
+
+If I use a 99% threshold it removes 10 assays
+
+
+
+```{r filter_low_detect}
+low_detect_assays <- assay_qc_summary %>%
+  filter(pct_below_lod >= 0.90) %>%
+  pull(OlinkID)
+
+clean_mpxv <- mpxv %>%
+  filter(!OlinkID %in% low_detect_assays) %>%
+  filter(!str_detect(SampleID, regex("Control"))) %>%
+  filter(
+    !Assay %in% c(
+      "Amplification control",
+      "Extension control",
+      "Incubation control"
+    )
+  )
+
+olink_pca_plot(clean_mpxv, color_g = "SampleQC", label_samples = TRUE)
+```
+
+<br/>
+<br/>
+<br/>
+<br/>
+
+
+```{r annotateFull}
+nat_ann <- read_excel("~/Desktop/250624 Annotations for MPXJYN Olink.xlsx")
+
+
+header_full_df <- clean_mpxv %>%
+  distinct(SampleID, WellID) %>%
+  arrange(SampleID) %>%
+  mutate(donor = str_sub(SampleID, 1, -3))
+
+counts_wide_full <- clean_mpxv %>%
+  select(SampleID, Assay, NPX) %>%
+  pivot_wider(
+    names_from   = Assay,
+    values_from  = NPX,
+    values_fill  = 0
+  ) %>%
+  arrange(SampleID)
+
+
+
+
+header_full_df <- header_full_df %>%
+  left_join(
+    nat_ann %>% select(sample_id, `NS Annotation`),
+    by = c("SampleID" = "sample_id")
+  ) %>%
+  dplyr::rename(ns_annotation = `NS Annotation`)
+
+expt <- c(rep("JYN", times = 45), rep("MPX", times = 41))
+header_full_df$expt <- expt
+
+
+final_df_full <- header_full_df %>%
+  left_join(counts_wide_full, by = "SampleID")
+```
+<br/>
+<br/>
+<br/>
+<br/>
+
+PCA MPX alone
+
+
+```{r pcampx}
+pca <- prcomp(
+  final_df_full[c(46:86), -c(1:5)],
+  scale. = TRUE
+)
+
+
+pca_scores <- as.data.frame(pca$x) %>%
+  mutate(
+    SampleID = final_df_full$SampleID[c(46:86)],
+    Donor = final_df_full$donor[c(46:86)],
+    NS_ann = final_df_full$ns_annotation[c(46:86)],
+    Expt = final_df_full$expt[c(46:86)]
+  )
+
+
+ggplot(pca_scores, aes(x = PC1, y = PC2, color = NS_ann, label = SampleID)) +
+  geom_point(size = 3) +
+  geom_text_repel(size = 3, max.overlaps = Inf) +
+  labs(
+    title = "PCA of Mpx data alone",
+    x = paste0("PC1 (", round(100 * summary(pca)$importance[2, 1], 1), "% variance)"),
+    y = paste0("PC2 (", round(100 * summary(pca)$importance[2, 2], 1), "% variance)")
+  ) +
+  theme_minimal()
+
+
+ggplot(pca_scores, aes(x = PC1, y = PC2, color = Donor)) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of Mpx data alone",
+    x = paste0("PC1 (", round(100 * summary(pca)$importance[2, 1], 1), "% variance)"),
+    y = paste0("PC2 (", round(100 * summary(pca)$importance[2, 2], 1), "% variance)")
+  ) +
+  theme_minimal()
+
+ggplot(pca_scores, aes(x = PC1, y = PC3, color = Donor)) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of Mpx data alone",
+    x = paste0("PC1 (", round(100 * summary(pca)$importance[2, 1], 1), "% variance)"),
+    y = paste0("PC3 (", round(100 * summary(pca)$importance[2, 3], 1), "% variance)")
+  ) +
+  theme_minimal()
+``` 
+  
+Very good segregation by time (and donor)
+ 
+<br/>
+<br/>
+<br/>
+<br/>
+
+PCA JYNNEOS alone
+
+```{r pca_JYN}
+pca_jyn <- prcomp(
+  final_df_full[1:45, -(1:5)],
+  scale. = TRUE
+)
+
+pca_scores_jyn <- pca_jyn$x %>%
+  as.data.frame() %>%
+  mutate(
+    sample_id     = final_df_full$SampleID[1:45],
+    donor         = final_df_full$donor[1:45],
+    ns_annotation = final_df_full$ns_annotation[1:45],
+    expt          = final_df_full$expt[1:45]
+  )
+
+ggplot(
+  pca_scores_jyn,
+  aes(x = PC1, y = PC2, color = ns_annotation)
+) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of JYN data alone",
+    x = sprintf(
+      "PC1 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 1]
+    ),
+    y = sprintf(
+      "PC2 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 2]
+    )
+  ) +
+  theme_minimal()
+
+ggplot(
+  pca_scores_jyn,
+  aes(x = PC1, y = PC2, color = donor)
+) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of JYN data alone",
+    x = sprintf(
+      "PC1 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 1]
+    ),
+    y = sprintf(
+      "PC2 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 2]
+    )
+  ) +
+  theme_minimal()
+
+ggplot(
+  pca_scores_jyn,
+  aes(x = PC1, y = PC3, color = ns_annotation)
+) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of JYN data alone",
+    x = sprintf(
+      "PC1 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 1]
+    ),
+    y = sprintf(
+      "PC3 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 3]
+    )
+  ) +
+  theme_minimal()
+
+ggplot(
+  pca_scores_jyn,
+  aes(x = PC3, y = PC2, color = ns_annotation)
+) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of JYN data alone",
+    x = sprintf(
+      "PC3 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 3]
+    ),
+    y = sprintf(
+      "PC2 (%0.1f%% variance)",
+      100 * summary(pca_jyn)$importance[2, 2]
+    )
+  ) +
+  theme_minimal()
+```
+
+
+Some segregation by time, (0 is different than 56) and by donor
+
+<br/>
+<br/>
+<br/>
+<br/>
+  
+
+PCA all data
+
+
+```{r pca_full}
+pca_full <- prcomp(
+  final_df_full[, -(1:5)],
+  scale. = TRUE
+)
+
+pca_scores_full <- pca_full$x %>%
+  as.data.frame() %>%
+  mutate(
+    sample_id     = final_df_full$SampleID,
+    donor         = final_df_full$donor,
+    ns_annotation = final_df_full$ns_annotation,
+    expt          = final_df_full$expt
+  )
+
+ggplot(
+  pca_scores_full,
+  aes(x = PC1, y = PC2, color = expt)
+) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of all data combined",
+    x = sprintf(
+      "PC1 (%0.1f%% variance)",
+      100 * summary(pca_full)$importance[2, 1]
+    ),
+    y = sprintf(
+      "PC2 (%0.1f%% variance)",
+      100 * summary(pca_full)$importance[2, 2]
+    )
+  ) +
+  theme_minimal()
+
+ggplot(
+  pca_scores_full,
+  aes(x = PC1, y = PC2, color = ns_annotation)
+) +
+  geom_point(size = 3) +
+  labs(
+    title = "PCA of all data combined",
+    x = sprintf(
+      "PC1 (%0.1f%% variance)",
+      100 * summary(pca_full)$importance[2, 1]
+    ),
+    y = sprintf(
+      "PC2 (%0.1f%% variance)",
+      100 * summary(pca_full)$importance[2, 2]
+    )
+  ) +
+  theme_minimal()
+```
+<br/>
+<br/>
+<br/>
+<br/>
+
+The Mpx and JYN data fairly segregate, no obvious clustering with a specific cluster of Mpx data, maybe closest to the 2 timepoint?
+
+```{r sessionInfo}
+sessionInfo()
+```
diff --git a/BillingsleyOlinkCode/SenOlinkRept3.Rmd b/BillingsleyOlinkCode/SenOlinkRept3.Rmd
new file mode 100644
index 0000000..8cc1048
--- /dev/null
+++ b/BillingsleyOlinkCode/SenOlinkRept3.Rmd
@@ -0,0 +1,480 @@
+
+---
+title: "SenOlinkReport3"
+author: "James Billingsley"
+date: "`r Sys.Date()`"
+output:
+  html_document:
+    code_folding: hide
+---
+
+```{r setup, include=FALSE, message=FALSE, warning = FALSE}
+knitr::opts_chunk$set(
+  echo = TRUE,
+  error = TRUE,
+  fig.align = "center",
+  fig.path = "/Users/jmb714/Desktop/figures/",
+  out.width = "75%",
+  message = FALSE,
+  warning = FALSE
+)
+
+clientname <- "Pritha Sen"
+clientemail <- "PSEN@BWH.HARVARD.EDU"
+labPI <- "Pritha Sen"
+lablocation <- "BWH"
+
+analystname <- "James Billingsley"
+analystemail <- "jbillingsley@hsph.harvard.edu"
+set.seed(42)
+datadir <- data_dir <- paste0(
+  "/Users/jmb714/Harvard University Dropbox/",
+  "HBC Team Folder (1)/Consults/pritha_sen/",
+  "sen_olink_human_blood_mpox_hbc05277/data/"
+)
+```
+
+
+**Olink REVEAL analysis of mpox samples [hbc05277] `r clientname`. **
+
+
+Contact `r analystname` (`r analystemail`) for additional details.
+
+The most recent update of this html document occurred: `r date()`
+
+
+The raw data are here: 
+
+```{r datadir}
+cat(datadir)
+```
+
+<br/>
+<br/>
+<br/>
+<br/>
+
+This report examines differential expression/abundance of serum proteins in the JYNNEOS Mpox prime-boost vaccine experiment. We have serial timepoints representing baseline (prime), boost, and two followup timepoints, i.e. D0, D28, D56 and D180.
+
+We will use linear mixed effects modeling and control for donor effects.
+
+We can look for proteins that change significantly over time, and also do post-tests to compare specific binary contrasts.
+
+In the QC process (slightly modified here from SenOlinkRept1.Rmd), we remove any assays that have ≥ 95% of their data points below LOD, and we remove the two QC flagged assays.
+
+This removes 79 assays leaving 953.
+
+We also run the analysis without removing these high LOD assays for comparison.
+
+
+
+
+```{r libraries}
+library(OlinkAnalyze)
+library(knitr)
+library(patchwork)
+library(tidyverse)
+library(readxl)
+library(UpSetR)
+library(ComplexHeatmap)
+library(forcats)
+```
+
+
+```{r readin}
+MPXV <- read.table(file = "~/Desktop/MPXV_June2025_Extended_NPX_2025-06-17.csv", sep = ";", h = TRUE)
+
+# add LOD data
+
+MPXV <- olink_lod(MPXV, lod_file_path = "~/Desktop/Reveal_Fixed_LOD_csv_file.csv", lod_method = "FixedLOD")
+
+
+# remove Control samples
+
+no_ctrl_MPXV <- MPXV %>%
+  filter(!str_detect(SampleID, regex("Control"))) # remove control samples
+
+
+# remove control assays
+
+
+no_ctrl_MPXV <- no_ctrl_MPXV %>%
+  filter(!Assay %in% c(
+    "Amplification control",
+    "Extension control",
+    "Incubation control"
+  )) # remove control assays!
+
+
+# remove flagged assays n=2
+
+
+warn_assays <- unique(no_ctrl_MPXV %>% filter(AssayQC == "WARN") %>% pull(Assay)) # two assays, "NPM1"   "SDHAF4"
+
+
+no_ctrl_MPXV <- no_ctrl_MPXV %>%
+  filter(!Assay %in% warn_assays)
+
+# length(unique(no_ctrl_MPXV$Assay))#1032
+
+
+# remove assays with ≥ 95% of datapoints reading below their published LOD.
+
+assay_qc_summary <- no_ctrl_MPXV %>%
+  group_by(Assay) %>%
+  summarise(
+    pct_below_LOD = mean(NPX <= LOD)
+  )
+
+
+low_detect_assays <- assay_qc_summary %>%
+  filter(pct_below_LOD >= 0.95) %>%
+  pull(Assay)
+
+
+clean_MPXV <- no_ctrl_MPXV %>%
+  filter(!Assay %in% low_detect_assays) # remove high LOD assays
+
+
+NatAnn <- read_excel("~/Desktop/250624 Annotations for MPXJYN Olink.xlsx")
+
+
+### Fix typo
+
+# which(NatAnn$sample_id == "JYN14-4")#62
+
+
+NatAnn$sample_id[62] <- "JYN14-5"
+
+
+header_Fulldf <- clean_MPXV %>%
+  distinct(SampleID) %>%
+  arrange(SampleID) %>%
+  mutate(
+    Donor = str_sub(SampleID, 1, -3)
+  ) %>%
+  left_join(
+    NatAnn %>%
+      select(sample_id, `NS Annotation`) %>%
+      rename(NS_annotation = `NS Annotation`),
+    by = c("SampleID" = "sample_id")
+  )
+
+
+
+Expt <- c(rep("JYN", times = 45), rep("MPX", times = 41))
+
+header_Fulldf$Expt <- Expt
+
+
+header_Fulldf$Donor <- factor(header_Fulldf$Donor)
+
+header_Fulldf$NS_annotation <- factor(header_Fulldf$NS_annotation)
+
+header_Fulldf$NExpt <- factor(header_Fulldf$Expt)
+
+
+annotated_long <- clean_MPXV %>%
+  left_join(header_Fulldf, by = "SampleID") # add annotation to my long form dataframe
+
+
+JYN_long <- annotated_long %>%
+  filter(Expt == "JYN")
+
+
+JYN_long %>%
+  select(SampleID, NS_annotation) %>%
+  distinct()
+
+
+# relevel
+
+
+JYN_long <- JYN_long %>%
+  mutate(
+    NS_annotation = fct_relevel(
+      NS_annotation,
+      "D0", "D28", "D56", "D180",
+      after = Inf
+    )
+  )
+
+JYN_lmer_results <- olink_lmer(
+  df       = JYN_long,
+  variable = "NS_annotation",
+  random   = "Donor"
+)
+
+
+# write.table(JYN_lmer_results, file="~/Desktop/JYN_lmer_results.txt", sep="\t", col.names=NA)
+
+
+JYN_posthoc <- olink_lmer_posthoc(
+  df            = JYN_long,
+  variable      = "NS_annotation",
+  random        = "Donor",
+  effect        = "NS_annotation",
+  verbose       = TRUE
+)
+
+# write.table(JYN_posthoc, file="~/Desktop/JYN_lmer_results_posthoc.txt", sep="\t", col.names=NA)
+
+
+signif_counts <- JYN_posthoc %>%
+  filter(Threshold == "Significant") %>%
+  group_by(contrast) %>%
+  summarise(n_signif = n(), .groups = "drop")
+
+
+ggplot(
+  signif_counts %>%
+    mutate(contrast = fct_reorder(contrast, n_signif, .desc = TRUE)),
+  aes(x = contrast, y = n_signif)
+) +
+  geom_col(fill = "steelblue") +
+  coord_flip() +
+  theme_minimal() +
+  labs(
+    x     = "Pairwise Contrast",
+    y     = "Number of Significant Proteins",
+    title = "DEPs per Contrast from NS_annotation"
+  )
+
+
+sig_sets <- JYN_posthoc %>%
+  filter(Threshold == "Significant") %>%
+  distinct(Assay, contrast) %>%
+  group_by(contrast) %>%
+  reframe(proteins = list(Assay))
+
+
+
+sig_list <- setNames(sig_sets$proteins, sig_sets$contrast)
+
+
+upset(fromList(sig_list),
+  order.by = "freq",
+  mb.ratio = c(0.6, 0.4),
+  main.bar.color = "steelblue",
+  sets.bar.color = "skyblue",
+  nsets = 6,
+  nintersects = NA
+)
+
+
+
+sig_proteins_lmm <- JYN_lmer_results %>%
+  filter(term == "NS_annotation", Threshold == "Significant") %>%
+  pull(Assay)
+
+
+sig_npx <- JYN_long %>%
+  filter(Assay %in% sig_proteins_lmm)
+
+mat <- sig_npx %>%
+  select(SampleID, Assay, NPX) %>%
+  pivot_wider(names_from = SampleID, values_from = NPX)
+
+rownames(mat) <- mat$Assay
+
+mat <- as.matrix(mat[, -1])
+
+mat_scaled <- t(scale(t(mat)))
+
+
+my_pal <- colorRampPalette(c("blue", "white", "red"))(256)
+
+
+Heatmap(
+  mat_scaled,
+  name = "Z-score",
+  cluster_rows = TRUE,
+  cluster_columns = TRUE,
+  col = my_pal,
+  show_row_names = TRUE,
+  show_column_names = TRUE,
+  column_title = "Samples",
+  row_title = "Significant Proteins"
+)
+```
+<br/>
+<br/>
+
+Comparing significantly differentially expressed proteins between D0 and D56, there are 168 proteins significantly downregulated at D56 and 21 proteins significantly upregulated at D56.
+
+Significance is at an adjusted p value of < 0.05.
+
+In the output file "estimate" represents log2FC.
+
+<br/>
+<br/>
+<br/>
+<br/>
+
+Some example boxplots
+
+
+```{r plotsB}
+baseline_vs_t3 <- JYN_posthoc %>%
+  filter(str_detect(contrast, "D0 - D56")) %>%
+  filter(Threshold == "Significant") %>%
+  arrange(Adjusted_pval)
+
+
+# baseline_vs_t3 %>% filter(estimate > 0) %>% nrow()#168
+
+# baseline_vs_t3 %>% filter(estimate < 0) %>% nrow()#21
+
+
+# head(baseline_vs_t3 %>% filter(estimate > 0))
+
+# baseline_vs_t3 %>% filter(estimate < 0)
+
+
+example_genes <- c("TNFRSF9", "VEGFD", "TRIM58")
+
+example_genes2 <- c("AGRP", "CCK", "BMP6")
+
+
+plot_data <- JYN_long %>%
+  filter(Assay %in% example_genes)
+
+plot_data2 <- JYN_long %>%
+  filter(Assay %in% example_genes2)
+
+
+
+
+ggplot(plot_data, aes(x = NS_annotation, y = NPX, fill = NS_annotation)) +
+  geom_boxplot(outlier.shape = NA) +
+  geom_jitter(width = 0.2, alpha = 0.6, size = 1.5) +
+  facet_wrap(~Assay, scales = "free_y") +
+  theme_bw() +
+  labs(
+    title = "Expression of Selected Proteins by NS_annotation",
+    x = "NS_annotation",
+    y = "NPX"
+  ) +
+  scale_fill_brewer(palette = "Set2")
+
+ggplot(plot_data2, aes(x = NS_annotation, y = NPX, fill = NS_annotation)) +
+  geom_boxplot(outlier.shape = NA) +
+  geom_jitter(width = 0.2, alpha = 0.6, size = 1.5) +
+  facet_wrap(~Assay, scales = "free_y") +
+  theme_bw() +
+  labs(
+    title = "Expression of Selected Proteins by NS_annotation",
+    x = "NS_annotation",
+    y = "NPX"
+  ) +
+  scale_fill_brewer(palette = "Set2")
+```
+<br/>
+<br/>
+<br/>
+<br/>
+
+
+If we run the experiment with no LOD assays removed, number assays = 1032
+
+
+Comparing significantly differentially expressed proteins between D0 and D56, there are 173 proteins significantly downregulated at D56 and 22 proteins significantly upregulated at D56.
+
+
+```{r noLODfilter, eval=F}
+# Run without removing high LOD assay_qc_summary
+
+
+no_ctrl_MPXV <- MPXV %>%
+  filter(!str_detect(SampleID, regex("Control"))) # remove control samples
+
+# remove control assays
+
+
+no_ctrl_MPXV <- no_ctrl_MPXV %>%
+  filter(!Assay %in% c(
+    "Amplification control",
+    "Extension control",
+    "Incubation control"
+  )) # remove control assays!
+
+
+# remove flagged assays n=2
+
+
+warn_assays <- unique(no_ctrl_MPXV %>%
+  filter(AssayQC == "WARN") %>%
+  pull(Assay)) # two assays, "NPM1"   "SDHAF4"
+
+
+no_ctrl_MPXV <- no_ctrl_MPXV %>%
+  filter(!Assay %in% warn_assays)
+
+clean_MPXV <- no_ctrl_MPXV
+
+annotated_long <- clean_MPXV %>%
+  left_join(header_Fulldf, by = "SampleID") # add annotation to my long form dataframe
+
+
+JYN_long <- annotated_long %>%
+  filter(Expt == "JYN")
+
+
+JYN_long %>%
+  select(SampleID, NS_annotation) %>%
+  distinct()
+
+
+# relevel
+
+
+
+JYN_long <- JYN_long %>%
+  mutate(
+    NS_annotation = fct_relevel(
+      NS_annotation,
+      "D0", "D28", "D56", "D180",
+      after = Inf
+    )
+  )
+
+
+JYN_lmer_results <- olink_lmer(
+  df       = JYN_long,
+  variable = "NS_annotation",
+  random   = "Donor"
+)
+
+
+# write.table(JYN_lmer_results, file="~/Desktop/JYN_lmer_results_noLODfilter.txt", sep="\t", col.names=NA)
+
+
+JYN_posthoc <- olink_lmer_posthoc(
+  df            = JYN_long,
+  variable      = "NS_annotation",
+  random        = "Donor",
+  effect        = "NS_annotation",
+  verbose       = TRUE
+)
+
+# write.table(JYN_posthoc, file="~/Desktop/JYN_lmer_results_posthoc_noLODfilter.txt", sep="\t", col.names=NA)
+
+
+baseline_vs_t3 <- JYN_posthoc %>%
+  filter(str_detect(contrast, "D0 - D56")) %>%
+  filter(Threshold == "Significant") %>%
+  arrange(Adjusted_pval)
+
+
+baseline_vs_t3 %>%
+  filter(estimate > 0) %>%
+  nrow() # 173
+
+baseline_vs_t3 %>%
+  filter(estimate < 0) %>%
+  nrow() # 22
+```
+
+```{r sessionInfo}
+sessionInfo()
+```