# functions for correcting for non-linearities in the OD, for
# the fluorescence of the media, and for autofluorescence
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import gaussianprocessderivatives as gp
from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.optimize import minimize_scalar
import pandas as pd
import om_code.sunder as sunder
import om_code.omerrors as errors
import om_code.omgenutils as gu
[docs]def findODcorrection(wdirpath, ODfname, exp, figs, odmatch):
"""
Uses a Gaussian process to fit serial dilution data
to correct for non-linearities in the relationship between OD and cell
density. The data are either loaded from file ODfname or the default
data for haploid yeast growing in glucose are used (collected by
L Bandiera).
"""
print("Fitting dilution data for OD correction for non-linearities")
if ODfname:
try:
od, dilfac = np.loadtxt(str(wdirpath / ODfname), unpack=True)
print("Using", ODfname)
except (FileNotFoundError, OSError):
raise errors.FileNotFound(str(wdirpath / ODfname))
else:
print("Using default data for haploid yeast in glucose")
# Lucia's data
od = np.array(
[
1.38192778,
1.15388333,
0.96450556,
0.78569444,
0.61685,
0.45291666,
0.33923888,
0.24501666,
0.18033889,
0.12463889,
0.08463889,
0.05783889,
0.04019444,
0.01776111,
0.01790556,
0.66888333,
0.47343889,
0.35310556,
0.25619444,
0.18245,
0.19291667,
0.12428333,
0.08719444,
0.05910555,
0.04203889,
0.02910556,
0.02035,
0.01392778,
0.00946111,
0.00651666,
1.06491665,
0.85690557,
0.71552777,
0.52646112,
0.39376111,
0.28846111,
0.20293889,
0.14296111,
0.10538333,
0.07017222,
0.04939444,
0.03397222,
0.02639445,
0.01780556,
0.01463889,
0.34130555,
0.21385,
0.15140556,
0.10612778,
0.07330556,
0.05147222,
0.03702778,
0.02599445,
0.01739444,
0.01188333,
0.00956111,
0.00572778,
0.00491667,
0.00275,
0.00316111,
]
)
dilfac = np.array(
[
1.0000000e00,
7.1968567e-01,
5.1794747e-01,
3.7275937e-01,
2.6826958e-01,
1.9306977e-01,
1.3894955e-01,
1.0000000e-01,
7.1968570e-02,
5.1794750e-02,
3.7275940e-02,
2.6826960e-02,
1.9306980e-02,
1.3894950e-02,
1.0000000e-02,
3.0484230e-01,
2.1939064e-01,
1.5789230e-01,
1.1363283e-01,
8.1779920e-02,
5.8855830e-02,
4.2357700e-02,
3.0484230e-02,
2.1939060e-02,
1.5789230e-02,
1.1363280e-02,
8.1779900e-03,
5.8855800e-03,
4.2357700e-03,
3.0484200e-03,
4.4830728e-01,
3.2264033e-01,
2.3219962e-01,
1.6711074e-01,
1.2026721e-01,
8.6554590e-02,
6.2292100e-02,
4.4830730e-02,
3.2264030e-02,
2.3219960e-02,
1.6711070e-02,
1.2026720e-02,
8.6554600e-03,
6.2292100e-03,
4.4830700e-03,
7.7885310e-02,
5.6052940e-02,
4.0340500e-02,
2.9032480e-02,
2.0894260e-02,
1.5037300e-02,
1.0822130e-02,
7.7885300e-03,
5.6052900e-03,
4.0340500e-03,
2.9032500e-03,
2.0894300e-03,
1.5037300e-03,
1.0822100e-03,
7.7885000e-04,
]
)
# process data
dilfac = dilfac[np.argsort(od)]
od = np.sort(od)
if odmatch:
# rescale so that OD and dilfac match at a particular OD
# compares better with Warringer & Blomberg, Yeast 2003,
# and rescaled OD is larger
dilfacmatch = interp1d(od, dilfac)(odmatch)
y = dilfac / dilfacmatch * odmatch
else:
y = dilfac
# run Gaussian process
gc = gp.sqexplinGP(
{0: (-4, 2), 1: (-3, 1), 2: (-6, 1), 3: (-6, 1)}, od, y
)
gc.findhyperparameters(noruns=5, exitearly=True, quiet=True)
gc.predict(od)
if figs:
plt.figure()
gc.sketch(".")
plt.grid(True)
plt.xlabel("OD")
plt.ylabel("corrected OD (relative cell numbers)")
if ODfname:
plt.title("Fitting " + ODfname)
else:
plt.title("for haploid budding yeast in glucose")
plt.show()
return gc
###
###
[docs]def correctauto1(
self,
f,
refstrain,
figs,
experiments,
experimentincludes,
experimentexcludes,
conditions,
conditionincludes,
conditionexcludes,
strains,
strainincludes,
strainexcludes,
):
"""
Corrects for autofluorescence for experiments with measured
emissions at one wavelength using the fluorescence of the reference strain
interpolated to the OD of the tagged strain. This in principle corrects too
for the fluorescence of the medium, although running correctmedia is still
preferred.
"""
print("Correcting autofluorescence using", f[0])
for e in sunder.getexps(
self, experiments, experimentincludes, experimentexcludes
):
for c in sunder.getcons(
self,
conditions,
conditionincludes,
conditionexcludes,
nomedia=True,
):
# process reference strain
refstrfn = processref1(self, f, refstrain, figs, e, c)
# correct strains
for s in sunder.getstrs(
self, strains, strainincludes, strainexcludes, nonull=True
):
if not self.sc[
(self.sc.experiment == e)
& (self.sc.condition == c)
& (self.sc.strain == s)
][f[0] + " corrected for autofluorescence"].any():
od, rawfl = sunder.extractwells(
self.r, self.s, e, c, s, ["OD", f[0]]
)
# no data
if od.size == 0 or rawfl.size == 0:
continue
# correct autofluorescence for each replicate
fl = np.transpose(
[
rawfl[:, i] - refstrfn(od[:, i])
for i in range(od.shape[1])
]
)
flperod = np.transpose(
[
(rawfl[:, i] - refstrfn(od[:, i])) / od[:, i]
for i in range(od.shape[1])
]
)
# replace negative values with NaNs
fl[fl < 0] = np.nan
flperod[flperod < 0] = np.nan
nonans = np.count_nonzero(np.isnan(fl))
if np.any(nonans):
print("Warning -", e + ":", s, "in", c, "\n",
nonans, "corrected data points are"
" NaN because the corrected fluorescence"
" was negative")
if nonans == fl.size:
print("Warning -", e + ":", s, "in", c, "\n",
"Corrected fluorescence is all NaN")
# store results
bname = "c-" + f[0]
autofdict = {
"experiment": e,
"condition": c,
"strain": s,
"time": self.s.query(
"experiment == @e and condition == @c "
"and strain == @s"
)["time"].to_numpy(),
bname: np.nanmean(fl, 1),
bname + " err": nanstdzeros2nan(fl, 1),
bname + "perOD": np.nanmean(flperod, 1),
bname + "perOD err": nanstdzeros2nan(flperod, 1),
}
autofdf = pd.DataFrame(autofdict)
if bname not in self.s.columns:
# extend dataframe
self.s = pd.merge(self.s, autofdf, how="outer")
else:
# update dataframe
self.s = gu.absorbdf(
self.s,
autofdf,
["experiment", "condition", "strain", "time"],
)
# record that correction has occurred
self.sc.loc[
(self.sc.experiment == e)
& (self.sc.condition == c)
& (self.sc.strain == s),
f[0] + " corrected for autofluorescence",
] = True
###
[docs]def processref1(self, f, refstrain, figs, experiment, condition):
"""
Processes reference strain for data with one fluorescence
measurement. Uses lowess to smooth the fluorescence of the reference
strain as a function of OD.
Parameters
----------
f: string
The fluorescence to be corrected. For example, ['mCherry'].
refstrain: string
The reference strain. For example, 'WT'.
figs: boolean
If True, display fits of the reference strain's fluorescence.
experiment: string
The experiment to be corrected.
condition: string
The condition to be corrected.
Returns
-------
refstrfn: function
The reference strain's fluorescence as a function of OD.
"""
e, c = experiment, condition
print(
e + ": Processing reference strain",
refstrain,
"for",
f[0],
"in",
c,
)
od, fl = sunder.extractwells(self.r, self.s, e, c, refstrain, ["OD", f[0]])
if od.size == 0 or fl.size == 0:
raise errors.CorrectAuto(e + ": " + refstrain + " not found in " + c)
else:
odf = od.flatten("F")
flf = fl.flatten("F")
# smooth fluorescence as a function of OD using lowess to minimize
# refstrain's autofluorescence
def choosefrac(frac):
res = lowess(flf, odf, frac=frac)
refstrfn = interp1d(
res[:, 0],
res[:, 1],
fill_value=(res[0, 1], res[-1, 1]),
bounds_error=False,
)
# max gives smoother fits than mean
return np.max(np.abs(flf - refstrfn(odf)))
res = minimize_scalar(choosefrac, bounds=(0.1, 0.99), method="bounded")
# choose the optimum frac
frac = res.x if res.success else 0.33
res = lowess(flf, odf, frac=frac)
refstrfn = interp1d(
res[:, 0],
res[:, 1],
fill_value=(res[0, 1], res[-1, 1]),
bounds_error=False,
)
if figs:
# plot fit
plt.figure()
plt.plot(odf, flf, ".", alpha=0.5)
plt.plot(res[:, 0], res[:, 1])
plt.xlabel("OD")
plt.ylabel(f[0])
plt.title(e + ": " + refstrain + " for " + c)
plt.show()
return refstrfn
###
[docs]def correctauto2(
self,
f,
refstrain,
figs,
experiments,
experimentincludes,
experimentexcludes,
conditions,
conditionincludes,
conditionexcludes,
strains,
strainincludes,
strainexcludes,
):
"""
Corrects for autofluorescence using spectral unmixing
for experiments with measured emissions at two wavelengths.
References
----------
CA Lichten, R White, IB Clark, PS Swain (2014). Unmixing of fluorescence
spectra to resolve quantitative time-series measurements of gene
expression in plate readers.
BMC Biotech, 14, 1-11.
"""
# correct for autofluorescence
print("Correcting autofluorescence using", f[0], "and", f[1])
for e in sunder.getexps(
self, experiments, experimentincludes, experimentexcludes
):
for c in sunder.getcons(
self,
conditions,
conditionincludes,
conditionexcludes,
nomedia=True,
):
# process reference strain
refqrfn = processref2(self, f, refstrain, figs, e, c)
# process other strains
for s in sunder.getstrs(
self, strains, strainincludes, strainexcludes, nonull=True
):
if s != refstrain and not (
self.sc[
(self.sc.experiment == e)
& (self.sc.condition == c)
& (self.sc.strain == s)
][f[0] + " corrected for autofluorescence"].any()
):
f0, f1 = sunder.extractwells(self.r, self.s, e, c, s, f)
if f0.size == 0 or f1.size == 0:
continue
nodata, nr = f0.shape
# set negative values to NaNs
f0[f0 < 0] = np.nan
f1[f1 < 0] = np.nan
# use mean OD for correction
odmean = self.s.query(
"experiment == @e and condition == @c and strain == @s"
)["OD mean"].to_numpy()
# remove autofluorescence
ra = refqrfn(odmean)
fl = applyautoflcorrection(self, ra, f0, f1)
od = sunder.extractwells(self.r, self.s, e, c, s, "OD")
flperod = fl / od
# set negative values to NaNs
fl[fl < 0] = np.nan
flperod[flperod < 0] = np.nan
# store corrected fluorescence
bname = "c-" + f[0]
autofdict = {
"experiment": e,
"condition": c,
"strain": s,
"time": self.s.query(
"experiment == @e and condition == @c"
" and strain == @s"
)["time"].to_numpy(),
bname: np.nanmean(fl, 1),
bname + " err": nanstdzeros2nan(fl, 1),
bname + "perOD": np.nanmean(flperod, 1),
bname + "perOD err": nanstdzeros2nan(flperod, 1),
}
# add to dataframe
self.s = gu.absorbdf(
self.s,
pd.DataFrame(autofdict),
["experiment", "condition", "strain", "time"],
)
self.sc.loc[
(self.sc.experiment == e)
& (self.sc.condition == c)
& (self.sc.strain == s),
f[0] + " corrected for autofluorescence",
] = True
###
[docs]def processref2(self, f, refstrain, figs, experiment, condition):
"""
Processes reference strain data for spectral unmixing
(for experiments with two fluorescence measurements). Uses lowess to
smooth the ratio of emitted fluorescence measurements so that the
reference strain's data is corrected to zero as best as possible.
Parameters
----------
f: list of strings
The fluorescence measurements. For example, ['GFP', 'AutoFL'].
refstrain: string
The reference strain. For example, 'WT'.
figs: boolean
If True, display fits of the fluorescence ratios.
experiment: string
The experiment to be corrected.
condition: string
The condition to be corrected.
Returns
-------
qrfn: function
The ratio of the two fluorescences for the reference strain as a
function of OD.
"""
e, c = experiment, condition
print(
e + ": Processing reference strain",
refstrain,
"for",
f[0],
"in",
c,
)
# refstrain data
f0, f1, od = sunder.extractwells(
self.r, self.s, e, c, refstrain, f + ["OD"]
)
if f0.size == 0 or f1.size == 0 or od.size == 0:
raise errors.CorrectAuto(e + ": " + refstrain + " not found in " + c)
else:
f0[f0 < 0] = np.nan
f1[f1 < 0] = np.nan
odf = od.flatten("F")
odrefmean = np.mean(od, 1)
qrf = (f1 / f0).flatten("F")
if np.all(np.isnan(qrf)):
raise errors.CorrectAuto(
e + ": " + refstrain + " in " + c + " has too many NaNs"
)
# smooth to minimize autofluorescence in refstrain
def choosefrac(frac):
res = lowess(qrf, odf, frac)
qrfn = interp1d(
res[:, 0],
res[:, 1],
fill_value=(res[0, 1], res[-1, 1]),
bounds_error=False,
)
flref = applyautoflcorrection(self, qrfn(odrefmean), f0, f1)
return np.max(np.abs(flref))
res = minimize_scalar(choosefrac, bounds=(0.1, 0.99), method="bounded")
# calculate the relationship between qr and OD
frac = res.x if res.success else 0.95
res = lowess(qrf, odf, frac)
qrfn = interp1d(
res[:, 0],
res[:, 1],
fill_value=(res[0, 1], res[-1, 1]),
bounds_error=False,
)
if figs:
plt.figure()
plt.plot(odf, qrf, ".", alpha=0.5)
plt.plot(res[:, 0], res[:, 1])
plt.xlabel("OD")
plt.ylabel(f[1] + "/" + f[0])
plt.title(e + ": " + refstrain + " in " + c)
plt.show()
# check autofluorescence correction for reference strain
flref = applyautoflcorrection(self, qrfn(odrefmean), f0, f1)
flrefperod = flref / od
# set negative values to NaNs
flref[flref < 0] = np.nan
flrefperod[flrefperod < 0] = np.nan
# store results
bname = "c-" + f[0]
autofdict = {
"experiment": e,
"condition": c,
"strain": refstrain,
"time": self.s.query(
"experiment == @e and condition == @c and strain == @refstrain"
)["time"].to_numpy(),
bname: np.nanmean(flref, 1),
bname + "perOD": np.nanmean(flrefperod, 1),
bname + " err": nanstdzeros2nan(flref, 1),
bname + "perOD err": nanstdzeros2nan(flrefperod, 1),
}
if bname not in self.s.columns:
self.s = pd.merge(self.s, pd.DataFrame(autofdict), how="outer")
else:
self.s = gu.absorbdf(
self.s,
pd.DataFrame(autofdict),
["experiment", "condition", "strain", "time"],
)
return qrfn
###
[docs]def applyautoflcorrection(self, ra, f0data, f1data):
"""
Corrects for autofluorescence returning an array of replicates.
"""
nr = f0data.shape[1]
raa = np.reshape(np.tile(ra, nr), (np.size(ra), nr), order="F")
return (raa * f0data - f1data) / (
raa - self._gamma * np.ones(np.shape(raa))
)
###
[docs]def nanstdzeros2nan(a, axis=None):
"""
nanstd but setting zeros to nan
"""
err = np.nanstd(a, axis)
err[err == 0] = np.nan
return err