Source code for om_code.corrections

# functions for correcting for non-linearities in the OD, for
# the fluorescence of the media, and for autofluorescence
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import gaussianprocessderivatives as gp
from statsmodels.nonparametric.smoothers_lowess import lowess
from scipy.optimize import minimize_scalar
import pandas as pd
import om_code.sunder as sunder
import om_code.omerrors as errors
import om_code.omgenutils as gu


[docs]def findODcorrection(wdirpath, ODfname, exp, figs, odmatch):
    """
    Uses a Gaussian process to fit serial dilution data
    to correct for non-linearities in the relationship between OD and cell
    density. The data are either loaded from file ODfname or the default
    data for haploid yeast growing in glucose are used (collected by
    L Bandiera).
    """
    print("Fitting dilution data for OD correction for non-linearities")
    if ODfname:
        try:
            od, dilfac = np.loadtxt(str(wdirpath / ODfname), unpack=True)
            print("Using", ODfname)
        except (FileNotFoundError, OSError):
            raise errors.FileNotFound(str(wdirpath / ODfname))
    else:
        print("Using default data for haploid yeast in glucose")
        # Lucia's data
        od = np.array(
            [
                1.38192778,
                1.15388333,
                0.96450556,
                0.78569444,
                0.61685,
                0.45291666,
                0.33923888,
                0.24501666,
                0.18033889,
                0.12463889,
                0.08463889,
                0.05783889,
                0.04019444,
                0.01776111,
                0.01790556,
                0.66888333,
                0.47343889,
                0.35310556,
                0.25619444,
                0.18245,
                0.19291667,
                0.12428333,
                0.08719444,
                0.05910555,
                0.04203889,
                0.02910556,
                0.02035,
                0.01392778,
                0.00946111,
                0.00651666,
                1.06491665,
                0.85690557,
                0.71552777,
                0.52646112,
                0.39376111,
                0.28846111,
                0.20293889,
                0.14296111,
                0.10538333,
                0.07017222,
                0.04939444,
                0.03397222,
                0.02639445,
                0.01780556,
                0.01463889,
                0.34130555,
                0.21385,
                0.15140556,
                0.10612778,
                0.07330556,
                0.05147222,
                0.03702778,
                0.02599445,
                0.01739444,
                0.01188333,
                0.00956111,
                0.00572778,
                0.00491667,
                0.00275,
                0.00316111,
            ]
        )
        dilfac = np.array(
            [
                1.0000000e00,
                7.1968567e-01,
                5.1794747e-01,
                3.7275937e-01,
                2.6826958e-01,
                1.9306977e-01,
                1.3894955e-01,
                1.0000000e-01,
                7.1968570e-02,
                5.1794750e-02,
                3.7275940e-02,
                2.6826960e-02,
                1.9306980e-02,
                1.3894950e-02,
                1.0000000e-02,
                3.0484230e-01,
                2.1939064e-01,
                1.5789230e-01,
                1.1363283e-01,
                8.1779920e-02,
                5.8855830e-02,
                4.2357700e-02,
                3.0484230e-02,
                2.1939060e-02,
                1.5789230e-02,
                1.1363280e-02,
                8.1779900e-03,
                5.8855800e-03,
                4.2357700e-03,
                3.0484200e-03,
                4.4830728e-01,
                3.2264033e-01,
                2.3219962e-01,
                1.6711074e-01,
                1.2026721e-01,
                8.6554590e-02,
                6.2292100e-02,
                4.4830730e-02,
                3.2264030e-02,
                2.3219960e-02,
                1.6711070e-02,
                1.2026720e-02,
                8.6554600e-03,
                6.2292100e-03,
                4.4830700e-03,
                7.7885310e-02,
                5.6052940e-02,
                4.0340500e-02,
                2.9032480e-02,
                2.0894260e-02,
                1.5037300e-02,
                1.0822130e-02,
                7.7885300e-03,
                5.6052900e-03,
                4.0340500e-03,
                2.9032500e-03,
                2.0894300e-03,
                1.5037300e-03,
                1.0822100e-03,
                7.7885000e-04,
            ]
        )
        # process data
        dilfac = dilfac[np.argsort(od)]
        od = np.sort(od)
        if odmatch:
            # rescale so that OD and dilfac match at a particular OD
            # compares better with Warringer & Blomberg, Yeast 2003,
            # and rescaled OD is larger
            dilfacmatch = interp1d(od, dilfac)(odmatch)
            y = dilfac / dilfacmatch * odmatch
        else:
            y = dilfac
        # run Gaussian process
        gc = gp.sqexplinGP(
            {0: (-4, 2), 1: (-3, 1), 2: (-6, 1), 3: (-6, 1)}, od, y
        )
        gc.findhyperparameters(noruns=5, exitearly=True, quiet=True)
        gc.predict(od)
        if figs:
            plt.figure()
            gc.sketch(".")
            plt.grid(True)
            plt.xlabel("OD")
            plt.ylabel("corrected OD (relative cell numbers)")
            if ODfname:
                plt.title("Fitting " + ODfname)
            else:
                plt.title("for haploid budding yeast in glucose")
            plt.show()
        return gc


###


[docs]def performmediacorrection(
    r_df, dtype, exp, condition, figs, commonmedia, frac
):
    """
    Uses lowess to smooth the media data from Null
    wells over time and subtracts the smoothed values from the data.
    """
    # find data for correction with condition equal to commonmedia
    df = r_df.query(
        "experiment == @exp and condition == @commonmedia"
        " and strain == 'Null'"
    )
    if df.empty:
        # no data
        print(
            ' No well annotated "Null" was found for',
            commonmedia,
            "in experiment",
            exp,
        )
        print(
            " Correcting for media for", dtype, "in", commonmedia, "abandoned!"
        )
        return False, None
    else:
        # there is data - change r dataframe
        rtest = (r_df.experiment == exp) & (r_df.condition == condition)
        t, data = df["time"].to_numpy(), df[dtype].to_numpy()
        # find correction
        res = lowess(data, t, frac=frac)
        correctionfn = interp1d(
            res[:, 0],
            res[:, 1],
            fill_value=(res[0, 1], res[-1, 1]),
            bounds_error=False,
        )
        if figs:
            plt.figure()
            plt.plot(t, data, "ro", res[:, 0], res[:, 1], "b-")
            plt.xlabel("time (hours)")
            plt.title(
                exp + ": media correction for " + dtype + " in " + condition
            )
            plt.show()
        # perform correction
        r_df.loc[rtest, dtype] = r_df[rtest][dtype] - correctionfn(
            r_df[rtest]["time"]
        )
        # check for any negative values
        negvalues = ""
        for s in np.unique(r_df[rtest]["strain"][r_df[rtest][dtype] < 0]):
            if s != "Null":
                wstr = (
                    "\t"
                    + dtype
                    + ": "
                    + s
                    + " in "
                    + condition
                    + " for wells "
                )
                for well in np.unique(
                    r_df[rtest][r_df[rtest].strain == s]["well"][
                        r_df[rtest][dtype] < 0
                    ]
                ):
                    wstr += well + " "
                wstr += "\n"
                negvalues += wstr
        return True, negvalues


###


[docs]def correctauto1(
    self,
    f,
    refstrain,
    figs,
    experiments,
    experimentincludes,
    experimentexcludes,
    conditions,
    conditionincludes,
    conditionexcludes,
    strains,
    strainincludes,
    strainexcludes,
):
    """
    Corrects for autofluorescence for experiments with measured
    emissions at one wavelength using the fluorescence of the reference strain
    interpolated to the OD of the tagged strain. This in principle corrects too
    for the fluorescence of the medium, although running correctmedia is still
    preferred.
    """
    print("Correcting autofluorescence using", f[0])
    for e in sunder.getexps(
        self, experiments, experimentincludes, experimentexcludes
    ):
        for c in sunder.getcons(
            self,
            conditions,
            conditionincludes,
            conditionexcludes,
            nomedia=True,
        ):
            # process reference strain
            refstrfn = processref1(self, f, refstrain, figs, e, c)
            # correct strains
            for s in sunder.getstrs(
                self, strains, strainincludes, strainexcludes, nonull=True
            ):
                if not self.sc[
                    (self.sc.experiment == e)
                    & (self.sc.condition == c)
                    & (self.sc.strain == s)
                ][f[0] + " corrected for autofluorescence"].any():
                    od, rawfl = sunder.extractwells(
                        self.r, self.s, e, c, s, ["OD", f[0]]
                    )
                    # no data
                    if od.size == 0 or rawfl.size == 0:
                        continue
                    # correct autofluorescence for each replicate
                    fl = np.transpose(
                        [
                            rawfl[:, i] - refstrfn(od[:, i])
                            for i in range(od.shape[1])
                        ]
                    )
                    flperod = np.transpose(
                        [
                            (rawfl[:, i] - refstrfn(od[:, i])) / od[:, i]
                            for i in range(od.shape[1])
                        ]
                    )
                    # replace negative values with NaNs
                    fl[fl < 0] = np.nan
                    flperod[flperod < 0] = np.nan
                    nonans = np.count_nonzero(np.isnan(fl))
                    if np.any(nonans):
                        print("Warning -", e + ":", s, "in", c, "\n",
                              nonans, "corrected data points are"
                              " NaN because the corrected fluorescence"
                              " was negative")
                        if nonans == fl.size:
                            print("Warning -", e + ":", s, "in", c, "\n",
                                  "Corrected fluorescence is all NaN")
                    # store results
                    bname = "c-" + f[0]
                    autofdict = {
                        "experiment": e,
                        "condition": c,
                        "strain": s,
                        "time": self.s.query(
                            "experiment == @e and condition == @c "
                            "and strain == @s"
                        )["time"].to_numpy(),
                        bname: np.nanmean(fl, 1),
                        bname + " err": nanstdzeros2nan(fl, 1),
                        bname + "perOD": np.nanmean(flperod, 1),
                        bname + "perOD err": nanstdzeros2nan(flperod, 1),
                    }
                    autofdf = pd.DataFrame(autofdict)
                    if bname not in self.s.columns:
                        # extend dataframe
                        self.s = pd.merge(self.s, autofdf, how="outer")
                    else:
                        # update dataframe
                        self.s = gu.absorbdf(
                            self.s,
                            autofdf,
                            ["experiment", "condition", "strain", "time"],
                        )
                    # record that correction has occurred
                    self.sc.loc[
                        (self.sc.experiment == e)
                        & (self.sc.condition == c)
                        & (self.sc.strain == s),
                        f[0] + " corrected for autofluorescence",
                    ] = True


###


[docs]def processref1(self, f, refstrain, figs, experiment, condition):
    """
    Processes reference strain for data with one fluorescence
    measurement. Uses lowess to smooth the fluorescence of the reference
    strain as a function of OD.

    Parameters
    ----------
    f: string
        The fluorescence to be corrected. For example, ['mCherry'].
    refstrain: string
        The reference strain. For example, 'WT'.
    figs: boolean
        If True, display fits of the reference strain's fluorescence.
    experiment: string
        The experiment to be corrected.
    condition: string
        The condition to be corrected.

    Returns
    -------
    refstrfn: function
        The reference strain's fluorescence as a function of OD.
    """
    e, c = experiment, condition
    print(
        e + ": Processing reference strain",
        refstrain,
        "for",
        f[0],
        "in",
        c,
    )
    od, fl = sunder.extractwells(self.r, self.s, e, c, refstrain, ["OD", f[0]])
    if od.size == 0 or fl.size == 0:
        raise errors.CorrectAuto(e + ": " + refstrain + " not found in " + c)
    else:
        odf = od.flatten("F")
        flf = fl.flatten("F")
        # smooth fluorescence as a function of OD using lowess to minimize
        # refstrain's autofluorescence

        def choosefrac(frac):
            res = lowess(flf, odf, frac=frac)
            refstrfn = interp1d(
                res[:, 0],
                res[:, 1],
                fill_value=(res[0, 1], res[-1, 1]),
                bounds_error=False,
            )
            # max gives smoother fits than mean
            return np.max(np.abs(flf - refstrfn(odf)))

        res = minimize_scalar(choosefrac, bounds=(0.1, 0.99), method="bounded")
        # choose the optimum frac
        frac = res.x if res.success else 0.33
        res = lowess(flf, odf, frac=frac)
        refstrfn = interp1d(
            res[:, 0],
            res[:, 1],
            fill_value=(res[0, 1], res[-1, 1]),
            bounds_error=False,
        )
        if figs:
            # plot fit
            plt.figure()
            plt.plot(odf, flf, ".", alpha=0.5)
            plt.plot(res[:, 0], res[:, 1])
            plt.xlabel("OD")
            plt.ylabel(f[0])
            plt.title(e + ": " + refstrain + " for " + c)
            plt.show()
        return refstrfn


###


[docs]def correctauto2(
    self,
    f,
    refstrain,
    figs,
    experiments,
    experimentincludes,
    experimentexcludes,
    conditions,
    conditionincludes,
    conditionexcludes,
    strains,
    strainincludes,
    strainexcludes,
):
    """
    Corrects for autofluorescence using spectral unmixing
    for experiments with measured emissions at two wavelengths.

    References
    ----------
    CA Lichten, R White, IB Clark, PS Swain (2014). Unmixing of fluorescence
    spectra to resolve quantitative time-series measurements of gene
    expression in plate readers.
    BMC Biotech, 14, 1-11.
    """
    # correct for autofluorescence
    print("Correcting autofluorescence using", f[0], "and", f[1])
    for e in sunder.getexps(
        self, experiments, experimentincludes, experimentexcludes
    ):
        for c in sunder.getcons(
            self,
            conditions,
            conditionincludes,
            conditionexcludes,
            nomedia=True,
        ):
            # process reference strain
            refqrfn = processref2(self, f, refstrain, figs, e, c)
            # process other strains
            for s in sunder.getstrs(
                self, strains, strainincludes, strainexcludes, nonull=True
            ):
                if s != refstrain and not (
                    self.sc[
                        (self.sc.experiment == e)
                        & (self.sc.condition == c)
                        & (self.sc.strain == s)
                    ][f[0] + " corrected for autofluorescence"].any()
                ):
                    f0, f1 = sunder.extractwells(self.r, self.s, e, c, s, f)
                    if f0.size == 0 or f1.size == 0:
                        continue
                    nodata, nr = f0.shape
                    # set negative values to NaNs
                    f0[f0 < 0] = np.nan
                    f1[f1 < 0] = np.nan
                    # use mean OD for correction
                    odmean = self.s.query(
                        "experiment == @e and condition == @c and strain == @s"
                    )["OD mean"].to_numpy()
                    # remove autofluorescence
                    ra = refqrfn(odmean)
                    fl = applyautoflcorrection(self, ra, f0, f1)
                    od = sunder.extractwells(self.r, self.s, e, c, s, "OD")
                    flperod = fl / od
                    # set negative values to NaNs
                    fl[fl < 0] = np.nan
                    flperod[flperod < 0] = np.nan
                    # store corrected fluorescence
                    bname = "c-" + f[0]
                    autofdict = {
                        "experiment": e,
                        "condition": c,
                        "strain": s,
                        "time": self.s.query(
                            "experiment == @e and condition == @c"
                            " and strain == @s"
                        )["time"].to_numpy(),
                        bname: np.nanmean(fl, 1),
                        bname + " err": nanstdzeros2nan(fl, 1),
                        bname + "perOD": np.nanmean(flperod, 1),
                        bname + "perOD err": nanstdzeros2nan(flperod, 1),
                    }
                    # add to dataframe
                    self.s = gu.absorbdf(
                        self.s,
                        pd.DataFrame(autofdict),
                        ["experiment", "condition", "strain", "time"],
                    )
                    self.sc.loc[
                        (self.sc.experiment == e)
                        & (self.sc.condition == c)
                        & (self.sc.strain == s),
                        f[0] + " corrected for autofluorescence",
                    ] = True


###


[docs]def processref2(self, f, refstrain, figs, experiment, condition):
    """
    Processes reference strain data for spectral unmixing
    (for experiments with two fluorescence measurements). Uses lowess to
    smooth the ratio of emitted fluorescence measurements so that the
    reference strain's data is corrected to zero as best as possible.

    Parameters
    ----------
    f: list of strings
        The fluorescence measurements. For example, ['GFP', 'AutoFL'].
    refstrain: string
        The reference strain. For example, 'WT'.
    figs: boolean
        If True, display fits of the fluorescence ratios.
    experiment: string
        The experiment to be corrected.
    condition: string
        The condition to be corrected.

    Returns
    -------
    qrfn: function
        The ratio of the two fluorescences for the reference strain as a
        function of OD.
    """
    e, c = experiment, condition
    print(
        e + ": Processing reference strain",
        refstrain,
        "for",
        f[0],
        "in",
        c,
    )
    # refstrain data
    f0, f1, od = sunder.extractwells(
        self.r, self.s, e, c, refstrain, f + ["OD"]
    )
    if f0.size == 0 or f1.size == 0 or od.size == 0:
        raise errors.CorrectAuto(e + ": " + refstrain + " not found in " + c)
    else:
        f0[f0 < 0] = np.nan
        f1[f1 < 0] = np.nan
        odf = od.flatten("F")
        odrefmean = np.mean(od, 1)
        qrf = (f1 / f0).flatten("F")
        if np.all(np.isnan(qrf)):
            raise errors.CorrectAuto(
                e + ": " + refstrain + " in " + c + " has too many NaNs"
            )
        # smooth to minimize autofluorescence in refstrain

        def choosefrac(frac):
            res = lowess(qrf, odf, frac)
            qrfn = interp1d(
                res[:, 0],
                res[:, 1],
                fill_value=(res[0, 1], res[-1, 1]),
                bounds_error=False,
            )
            flref = applyautoflcorrection(self, qrfn(odrefmean), f0, f1)
            return np.max(np.abs(flref))

        res = minimize_scalar(choosefrac, bounds=(0.1, 0.99), method="bounded")
        # calculate the relationship between qr and OD
        frac = res.x if res.success else 0.95
        res = lowess(qrf, odf, frac)
        qrfn = interp1d(
            res[:, 0],
            res[:, 1],
            fill_value=(res[0, 1], res[-1, 1]),
            bounds_error=False,
        )
        if figs:
            plt.figure()
            plt.plot(odf, qrf, ".", alpha=0.5)
            plt.plot(res[:, 0], res[:, 1])
            plt.xlabel("OD")
            plt.ylabel(f[1] + "/" + f[0])
            plt.title(e + ": " + refstrain + " in " + c)
            plt.show()
        # check autofluorescence correction for reference strain
        flref = applyautoflcorrection(self, qrfn(odrefmean), f0, f1)
        flrefperod = flref / od
        # set negative values to NaNs
        flref[flref < 0] = np.nan
        flrefperod[flrefperod < 0] = np.nan
        # store results
        bname = "c-" + f[0]
        autofdict = {
            "experiment": e,
            "condition": c,
            "strain": refstrain,
            "time": self.s.query(
                "experiment == @e and condition == @c and strain == @refstrain"
            )["time"].to_numpy(),
            bname: np.nanmean(flref, 1),
            bname + "perOD": np.nanmean(flrefperod, 1),
            bname + " err": nanstdzeros2nan(flref, 1),
            bname + "perOD err": nanstdzeros2nan(flrefperod, 1),
        }
        if bname not in self.s.columns:
            self.s = pd.merge(self.s, pd.DataFrame(autofdict), how="outer")
        else:
            self.s = gu.absorbdf(
                self.s,
                pd.DataFrame(autofdict),
                ["experiment", "condition", "strain", "time"],
            )
        return qrfn


###


[docs]def applyautoflcorrection(self, ra, f0data, f1data):
    """
    Corrects for autofluorescence returning an array of replicates.
    """
    nr = f0data.shape[1]
    raa = np.reshape(np.tile(ra, nr), (np.size(ra), nr), order="F")
    return (raa * f0data - f1data) / (
        raa - self._gamma * np.ones(np.shape(raa))
    )


###


[docs]def nanstdzeros2nan(a, axis=None):
    """
    nanstd but setting zeros to nan
    """
    err = np.nanstd(a, axis)
    err[err == 0] = np.nan
    return err
omniplate 0.4.92 documentation

Source code for om_code.corrections