Large textural dataset analysis - Skuggafjoll¶

In [1]:

Copied!





#Load in the data from the segmentation results
import numpy as np
import matplotlib.pyplot as plt
import piecewise_regression
samples = ["HOR11_01A", "HOR11_01B", "HOR11_01C", "HOR11_02A", "HOR11_02B", "HOR12_03A", "HOR12_03B", "SKU11_02A" ,
            "SKU11_02B", "SKU11_03", "SKU12_05", "SKU12_06A", "SKU12_06B", "SKU12_07","SKU12_09A", "SKU12_09B","SKU12_09C", "SKU12_14B", "SKU12_15B",
             "SKU12_15A", "SKU12_15B", "SKU12_15C", "SKU12_16","SKU12_21",  "SKU12_22","SKU12_25", "SKU12_26"]


centres = []
tex_counts = []
tex_centres = []
area_sqrt = []
aspect_ratio = []
aspect_ratio_sigma = []
for item in samples:
    tex = np.load("data/data_geo_" + str(item) + ".npz")
    counts = tex["counts"]
    counts_x = tex["centres"]
    ar = tex["arb"]
    ars = tex["ars"]
    asqrt = tex["ar_centres"]
    tex_counts.append(counts)
    tex_centres.append(counts_x)
    aspect_ratio.append(ar)
    area_sqrt.append(asqrt)
    aspect_ratio_sigma.append(ars)
#Load in the data from the segmentation results
import numpy as np
import matplotlib.pyplot as plt
import piecewise_regression
samples = ["HOR11_01A", "HOR11_01B", "HOR11_01C", "HOR11_02A", "HOR11_02B", "HOR12_03A", "HOR12_03B", "SKU11_02A" ,
            "SKU11_02B", "SKU11_03", "SKU12_05", "SKU12_06A", "SKU12_06B", "SKU12_07","SKU12_09A", "SKU12_09B","SKU12_09C", "SKU12_14B", "SKU12_15B",
             "SKU12_15A", "SKU12_15B", "SKU12_15C", "SKU12_16","SKU12_21",  "SKU12_22","SKU12_25", "SKU12_26"]


centres = []
tex_counts = []
tex_centres = []
area_sqrt = []
aspect_ratio = []
aspect_ratio_sigma = []
for item in samples:
    tex = np.load("data/data_geo_" + str(item) + ".npz")
    counts = tex["counts"]
    counts_x = tex["centres"]
    ar = tex["arb"]
    ars = tex["ars"]
    asqrt = tex["ar_centres"]
    tex_counts.append(counts)
    tex_centres.append(counts_x)
    aspect_ratio.append(ar)
    area_sqrt.append(asqrt)
    aspect_ratio_sigma.append(ars)

In [2]:

Copied!





#calculate bin widths

def get_bw(centres, min_size = 0):
    bw = []
    for i in range(len(centres)):
        if i == 0:
            bw.append(centres[i]-min_size)
        else:
            bw.append(centres[i]-centres[i-1])

    return np.asarray(bw)

bw_list = []
for item in tex_centres:
    bw_list.append(get_bw(item, min_size = 100))
#calculate bin widths

def get_bw(centres, min_size = 0):
    bw = []
    for i in range(len(centres)):
        if i == 0:
            bw.append(centres[i]-min_size)
        else:
            bw.append(centres[i]-centres[i-1])

    return np.asarray(bw)

bw_list = []
for item in tex_centres:
    bw_list.append(get_bw(item, min_size = 100))

In [3]:

Copied!





#piecewise regression fits - 2 samples don't have any breakpoints so a custom function was written to bypass it

def no_bp_fit(x_plot, y_plot):
    ms = piecewise_regression.ModelSelection(x_plot, y_plot, max_breakpoints = 1)
    const = ms.model_summaries[0]["estimates"]["const"]
    alpha = ms.model_summaries[0]["estimates"]["alpha1"]
    return const, alpha

bp = []
alpha1 = []
alpha2 = []
se = []
alpha = []
consts = []
max_x = []
min_x = []
pw_fit_list = []


for i in range(len(samples)):
    
    y_plot = np.log(np.divide(tex_counts[i]/0.15, bw_list[i]*0.0001))
    x_plot = tex_centres[i]


    if samples[i] == "SKU12_05":
        c, a = no_bp_fit(x_plot, y_plot)
        alpha.append(a)
        consts.append(c)
        max_x.append(np.max(x_plot))
        min_x.append(np.min(x_plot))
    elif samples[i] == "SKU12_25":
        c, a = no_bp_fit(x_plot, y_plot)
        alpha.append(a)
        consts.append(c)
        max_x.append(np.max(x_plot))
        min_x.append(np.min(x_plot))
    else:
    
        pw_fit = piecewise_regression.Fit(x_plot, y_plot, n_breakpoints=1)
        res = pw_fit.get_results()["estimates"]
        bp.append(res["breakpoint1"]["estimate"])
        alpha1.append(res["alpha1"]["estimate"])
        alpha2.append(res["alpha2"]["estimate"])
        se.append([res["breakpoint1"]["se"],res["alpha1"]["se"],res["alpha2"]["se"]])
        pw_fit_list.append(pw_fit)

        
        # Plot the data, fit, breakpoints and confidence intervals
        #pw_fit.plot_data(color="grey", s=20)
        # Pass in standard matplotlib keywords to control any of the plots
        #pw_fit.plot_fit(color="red", linewidth=2)
        #plt.xlabel("x")
        #plt.ylabel("y")
bp = np.asarray(bp)
se = np.asarray(se)
alpha1=np.asarray(alpha1)
alpha2 = np.asarray(alpha2)
#piecewise regression fits - 2 samples don't have any breakpoints so a custom function was written to bypass it

def no_bp_fit(x_plot, y_plot):
    ms = piecewise_regression.ModelSelection(x_plot, y_plot, max_breakpoints = 1)
    const = ms.model_summaries[0]["estimates"]["const"]
    alpha = ms.model_summaries[0]["estimates"]["alpha1"]
    return const, alpha

bp = []
alpha1 = []
alpha2 = []
se = []
alpha = []
consts = []
max_x = []
min_x = []
pw_fit_list = []


for i in range(len(samples)):
    
    y_plot = np.log(np.divide(tex_counts[i]/0.15, bw_list[i]*0.0001))
    x_plot = tex_centres[i]


    if samples[i] == "SKU12_05":
        c, a = no_bp_fit(x_plot, y_plot)
        alpha.append(a)
        consts.append(c)
        max_x.append(np.max(x_plot))
        min_x.append(np.min(x_plot))
    elif samples[i] == "SKU12_25":
        c, a = no_bp_fit(x_plot, y_plot)
        alpha.append(a)
        consts.append(c)
        max_x.append(np.max(x_plot))
        min_x.append(np.min(x_plot))
    else:
    
        pw_fit = piecewise_regression.Fit(x_plot, y_plot, n_breakpoints=1)
        res = pw_fit.get_results()["estimates"]
        bp.append(res["breakpoint1"]["estimate"])
        alpha1.append(res["alpha1"]["estimate"])
        alpha2.append(res["alpha2"]["estimate"])
        se.append([res["breakpoint1"]["se"],res["alpha1"]["se"],res["alpha2"]["se"]])
        pw_fit_list.append(pw_fit)

        
        # Plot the data, fit, breakpoints and confidence intervals
        #pw_fit.plot_data(color="grey", s=20)
        # Pass in standard matplotlib keywords to control any of the plots
        #pw_fit.plot_fit(color="red", linewidth=2)
        #plt.xlabel("x")
        #plt.ylabel("y")
bp = np.asarray(bp)
se = np.asarray(se)
alpha1=np.asarray(alpha1)
alpha2 = np.asarray(alpha2)

Running fit with n_breakpoint = 0 . . 
Running fit with n_breakpoint = 1 . . 

                 Breakpoint Model Comparision Results                 
====================================================================================================
n_breakpoints            BIC    converged          RSS 
----------------------------------------------------------------------------------------------------
0                    -19.266         True      0.42799 
1                                   False              

Min BIC (Bayesian Information Criterion) suggests best model
Running fit with n_breakpoint = 0 . . 
Running fit with n_breakpoint = 1 . . 

                 Breakpoint Model Comparision Results                 
====================================================================================================
n_breakpoints            BIC    converged          RSS 
----------------------------------------------------------------------------------------------------
0                    -18.204         True      0.48875 
1                    -15.579         True      0.40346 

Min BIC (Bayesian Information Criterion) suggests best model

Visualize results¶

In [4]:

Copied!





def mc_average(means, sigmas, n = 100000):
    m = np.random.randint(0, len(means), n)
    samples = np.random.normal(loc = means[m], scale = sigmas[m])
    return np.mean(samples), np.std(samples)
def mc_average(means, sigmas, n = 100000):
    m = np.random.randint(0, len(means), n)
    samples = np.random.normal(loc = means[m], scale = sigmas[m])
    return np.mean(samples), np.std(samples)

In [5]:

Copied!





import matplotlib.gridspec as gridspec
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
fig, ax = plt.subplots(2,2,figsize=(7.5, 5))
ax1 = ax[0,0]
ax2 = ax[0,1]
ax3 = ax[1,0]
ax[1,1].axis("off")

scatter_kwargs = {"zorder":100}
error_kwargs = {"lw":1, "zorder":0}
ax1.errorbar(alpha1, bp, xerr = se[:,1], yerr = se[:,0], fmt = 'none', alpha = 0.5, c = 'k' , **error_kwargs)
ax1.scatter(alpha1, bp,c = 'tab:blue' , **scatter_kwargs)
ax1.plot(alpha, [0,0], 'rD')
ax1.set_ylim([220, 550])

ax2.errorbar(alpha2, bp, xerr = se[:,2], yerr = se[:,0], fmt = 'none', alpha = 0.5, c = 'k' , **error_kwargs)
ax2.scatter(alpha2, bp,c = 'tab:blue' , **scatter_kwargs)

ax3.errorbar(alpha1, alpha2, xerr = se[:,1], yerr = se[:,2], fmt = 'none', alpha = 0.5, c = 'k' , **error_kwargs)
ax3.scatter(alpha1, alpha2,c = 'tab:blue' , **scatter_kwargs)
ax3.plot(alpha, [0,0], 'rD')

ax1.xaxis.set_major_locator(MultipleLocator(0.004))
ax2.xaxis.set_major_locator(MultipleLocator(0.001))
ax3.xaxis.set_major_locator(MultipleLocator(0.004))

ax1.set_ylabel(r"Breakpoint ($\mu$m)")
ax2.set_ylabel(r"Breakpoint ($\mu$m)")
ax3.set_ylabel(r"$\alpha_2$")

ax1.set_xlabel(r"$\alpha_1$")
ax2.set_xlabel(r"$\alpha_2$")
ax3.set_xlabel(r"$\alpha_1$")

import matplotlib.lines as mlines

red_diamond = mlines.Line2D([], [], color='red', marker='D', linestyle='None',
                          markersize=5, label='Non-kinked CSDs')
dot = mlines.Line2D([], [], color='cadetblue', marker='o', linestyle='None',
                          markersize=5, label='Kinked CSDs')

ax[1,1].legend(handles=[red_diamond, dot], loc = "upper left")
ax[1,1].text(0.17, 0.55, r"Averages:")
ax[1,1].text(0.03, 0.45, r"Breakpoint: 399 $\pm$ 64")
ax[1,1].text(0.03, 0.35, r"$\alpha_1$: -1.7$\times10^{-2}$ $\pm$ 4.0$\times10^{-3}$")
ax[1,1].text(0.03, 0.25, r"$\alpha_2$: -2.2$\times10^{-3}$ $\pm$ 6.6$\times10^{-4}$")

plt.tight_layout()
import matplotlib.gridspec as gridspec
from matplotlib.ticker import (AutoMinorLocator, MultipleLocator)
fig, ax = plt.subplots(2,2,figsize=(7.5, 5))
ax1 = ax[0,0]
ax2 = ax[0,1]
ax3 = ax[1,0]
ax[1,1].axis("off")

scatter_kwargs = {"zorder":100}
error_kwargs = {"lw":1, "zorder":0}
ax1.errorbar(alpha1, bp, xerr = se[:,1], yerr = se[:,0], fmt = 'none', alpha = 0.5, c = 'k' , **error_kwargs)
ax1.scatter(alpha1, bp,c = 'tab:blue' , **scatter_kwargs)
ax1.plot(alpha, [0,0], 'rD')
ax1.set_ylim([220, 550])

ax2.errorbar(alpha2, bp, xerr = se[:,2], yerr = se[:,0], fmt = 'none', alpha = 0.5, c = 'k' , **error_kwargs)
ax2.scatter(alpha2, bp,c = 'tab:blue' , **scatter_kwargs)

ax3.errorbar(alpha1, alpha2, xerr = se[:,1], yerr = se[:,2], fmt = 'none', alpha = 0.5, c = 'k' , **error_kwargs)
ax3.scatter(alpha1, alpha2,c = 'tab:blue' , **scatter_kwargs)
ax3.plot(alpha, [0,0], 'rD')

ax1.xaxis.set_major_locator(MultipleLocator(0.004))
ax2.xaxis.set_major_locator(MultipleLocator(0.001))
ax3.xaxis.set_major_locator(MultipleLocator(0.004))

ax1.set_ylabel(r"Breakpoint ($\mu$m)")
ax2.set_ylabel(r"Breakpoint ($\mu$m)")
ax3.set_ylabel(r"$\alpha_2$")

ax1.set_xlabel(r"$\alpha_1$")
ax2.set_xlabel(r"$\alpha_2$")
ax3.set_xlabel(r"$\alpha_1$")

import matplotlib.lines as mlines

red_diamond = mlines.Line2D([], [], color='red', marker='D', linestyle='None',
                          markersize=5, label='Non-kinked CSDs')
dot = mlines.Line2D([], [], color='cadetblue', marker='o', linestyle='None',
                          markersize=5, label='Kinked CSDs')

ax[1,1].legend(handles=[red_diamond, dot], loc = "upper left")
ax[1,1].text(0.17, 0.55, r"Averages:")
ax[1,1].text(0.03, 0.45, r"Breakpoint: 399 $\pm$ 64")
ax[1,1].text(0.03, 0.35, r"$\alpha_1$: -1.7$\times10^{-2}$ $\pm$ 4.0$\times10^{-3}$")
ax[1,1].text(0.03, 0.25, r"$\alpha_2$: -2.2$\times10^{-3}$ $\pm$ 6.6$\times10^{-4}$")

plt.tight_layout()

No description has been provided for this image

In [6]:

Copied!





fig = plt.figure(figsize=(7.5, 5))



for pw in pw_fit_list:
    pw.plot_fit(color = "tab:blue", linewidth = 2)

for i in range(len(alpha)):
    a = alpha[i]
    c = consts[i]
    max_val = max_x[i]
    min_val = min_x[i]
    x = np.linspace(min_val, max_val, 20)
    y = c + x*a
    plt.plot(x, y, 'r', alpha = 0.8, linewidth = 2)

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color="cadetblue", lw=3),
                Line2D([0], [0], color="r", lw=3)]
plt.legend(custom_lines, ["Kinked CSDs", "Non-kinked CSDs"], loc = "upper center")
plt.xlabel(r"Area$^{0.5}$ ($\mu m$)", size = 14)
plt.ylabel("$ln$(population density)", size = 14)
plt.text(1400, 7.5, r"- Geometric binning strategy", size = 10)
plt.text(1400, 6.5, r"- Piecewise linear regression used for kinked CSDs", size = 10)
plt.text(1400, 5.5, r"- Simple linear regression used for non-kinked CSDs", size = 10)
fig.tight_layout()
fig = plt.figure(figsize=(7.5, 5))



for pw in pw_fit_list:
    pw.plot_fit(color = "tab:blue", linewidth = 2)

for i in range(len(alpha)):
    a = alpha[i]
    c = consts[i]
    max_val = max_x[i]
    min_val = min_x[i]
    x = np.linspace(min_val, max_val, 20)
    y = c + x*a
    plt.plot(x, y, 'r', alpha = 0.8, linewidth = 2)

from matplotlib.lines import Line2D
custom_lines = [Line2D([0], [0], color="cadetblue", lw=3),
                Line2D([0], [0], color="r", lw=3)]
plt.legend(custom_lines, ["Kinked CSDs", "Non-kinked CSDs"], loc = "upper center")
plt.xlabel(r"Area$^{0.5}$ ($\mu m$)", size = 14)
plt.ylabel("$ln$(population density)", size = 14)
plt.text(1400, 7.5, r"- Geometric binning strategy", size = 10)
plt.text(1400, 6.5, r"- Piecewise linear regression used for kinked CSDs", size = 10)
plt.text(1400, 5.5, r"- Simple linear regression used for non-kinked CSDs", size = 10)
fig.tight_layout()

In [7]:

Copied!





max_len = 0
index = 0
for i in range(len(aspect_ratio)):
    if len(aspect_ratio[i]) > max_len:
        max_len = len(aspect_ratio[i])
        index = i
    else:
        pass

means = []
stds = []
binned_ar = []
binned_ar_sigma = []
for j in range(max_len):
    temp = []
    temp_sigma = []
    for i in range(len(aspect_ratio)):
        item = aspect_ratio[i]
        if len(item) <= j:
            pass
        else:
            temp.append(item[j])
            temp_sigma.append(aspect_ratio_sigma[j])
    means.append(np.mean(temp))
    stds.append(np.std(temp))
    binned_ar.append(temp)
    binned_ar_sigma.append(temp_sigma)
max_len = 0
index = 0
for i in range(len(aspect_ratio)):
    if len(aspect_ratio[i]) > max_len:
        max_len = len(aspect_ratio[i])
        index = i
    else:
        pass

means = []
stds = []
binned_ar = []
binned_ar_sigma = []
for j in range(max_len):
    temp = []
    temp_sigma = []
    for i in range(len(aspect_ratio)):
        item = aspect_ratio[i]
        if len(item) <= j:
            pass
        else:
            temp.append(item[j])
            temp_sigma.append(aspect_ratio_sigma[j])
    means.append(np.mean(temp))
    stds.append(np.std(temp))
    binned_ar.append(temp)
    binned_ar_sigma.append(temp_sigma)

In [8]:

Copied!

plt.errorbar(tex_centres[index][:max_len], means, yerr=stds, fmt = "o")
plt.errorbar(tex_centres[index][:max_len], means, yerr=stds, fmt = "o")

Out[8]:

<ErrorbarContainer object of 3 artists>

Use MCMC fit from Sill calibration¶

In [9]:

Copied!

import bilby
result = bilby.result.read_in_result(filename='mcmc2/unknown_x_result.json')
import bilby
result = bilby.result.read_in_result(filename='mcmc2/unknown_x_result.json')

In [10]:

Copied!





m = result.posterior["m"].to_numpy()
c = result.posterior["c"].to_numpy()
m_best = np.mean(m)
c_best = np.mean(c)
m = result.posterior["m"].to_numpy()
c = result.posterior["c"].to_numpy()
m_best = np.mean(m)
c_best = np.mean(c)

In [11]:

Copied!

best_fit = np.asarray(means)*m_best + c_best
best_fit = np.asarray(means)*m_best + c_best

In [12]:

Copied!

realisations = np.asarray(means)*m.reshape(-1,1) + c.reshape(-1,1)
realisations = np.asarray(means)*m.reshape(-1,1) + c.reshape(-1,1)

In [13]:

Copied!





binned_mean = []
binned_std = []
all_realisations = []
for item in binned_ar:
    best_fit = np.asarray(item)*m_best + c_best
    realisations = np.asarray(item)*m.reshape(-1,1) + c.reshape(-1,1)
    binned_mean.append(np.mean(best_fit))
    binned_std.append(np.std(realisations))
binned_mean = []
binned_std = []
all_realisations = []
for item in binned_ar:
    best_fit = np.asarray(item)*m_best + c_best
    realisations = np.asarray(item)*m.reshape(-1,1) + c.reshape(-1,1)
    binned_mean.append(np.mean(best_fit))
    binned_std.append(np.std(realisations))

In [14]:

Copied!





fig, ax = plt.subplots(1,1,figsize=(6.5, 5))

error_kwargs = {"zorder":10}
l_kwargs = {"zorder":0}
ax.set_ylabel("Apparent Mean Aspect Ratio", color = 'k', size = 14)

axt = ax.twinx()
axt.plot(tex_centres[index][:max_len], 10**np.asarray(binned_mean), 'o',**scatter_kwargs)
axt.errorbar(tex_centres[index][:max_len], 10**np.asarray(binned_mean), yerr =[10**np.asarray(binned_mean) - 10**(np.asarray(binned_mean)-np.asarray(binned_std)),
                                                                               10**(np.asarray(binned_mean)+np.asarray(binned_std)) - 10**np.asarray(binned_mean)],
                                                                                 fmt = 'none', alpha = 0.5, **error_kwargs)
axt.set_ylabel("Calculated cooling time (years)", color = "k", size = 14)
ax.set_xlabel(r"Area$^{0.5}$ ($\mu m$)", size = 14)
axt.set_yscale("log")
axt.hlines(10**4,-10, 3100, 'r', linestyle = '-.', alpha = 0.75 , **l_kwargs)
axt.text(2700, 10**4, '10 kyrs', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
axt.hlines(10**1,-10, 3100, 'r', linestyle = '-.' , alpha = 0.75 , **l_kwargs)
axt.text(2700, 10**1, '1 year', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
axt.hlines(1/365.25,-10, 3100, 'r', linestyle = '-.' , alpha = 0.75, **l_kwargs )
axt.text(2700, 1/365.25, '1 day', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
axt.hlines(1/(365.25*24*3600),-10, 3100, 'r', linestyle = '-.' , alpha = 0.75, **l_kwargs )
axt.text(2700, 1/(365.25*24*3600), '1 sec', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
ax.set_xlim([-10, 3100])
ax.set_ylim([1.3,4.8])
axt.set_ylim([10**(1.3*m_best + c_best), 10**(4.8*m_best+c_best)])
fig.tight_layout()
fig.savefig("aspect_ratio_timescales_skug.png", dpi =500)
fig, ax = plt.subplots(1,1,figsize=(6.5, 5))

error_kwargs = {"zorder":10}
l_kwargs = {"zorder":0}
ax.set_ylabel("Apparent Mean Aspect Ratio", color = 'k', size = 14)

axt = ax.twinx()
axt.plot(tex_centres[index][:max_len], 10**np.asarray(binned_mean), 'o',**scatter_kwargs)
axt.errorbar(tex_centres[index][:max_len], 10**np.asarray(binned_mean), yerr =[10**np.asarray(binned_mean) - 10**(np.asarray(binned_mean)-np.asarray(binned_std)),
                                                                               10**(np.asarray(binned_mean)+np.asarray(binned_std)) - 10**np.asarray(binned_mean)],
                                                                                 fmt = 'none', alpha = 0.5, **error_kwargs)
axt.set_ylabel("Calculated cooling time (years)", color = "k", size = 14)
ax.set_xlabel(r"Area$^{0.5}$ ($\mu m$)", size = 14)
axt.set_yscale("log")
axt.hlines(10**4,-10, 3100, 'r', linestyle = '-.', alpha = 0.75 , **l_kwargs)
axt.text(2700, 10**4, '10 kyrs', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
axt.hlines(10**1,-10, 3100, 'r', linestyle = '-.' , alpha = 0.75 , **l_kwargs)
axt.text(2700, 10**1, '1 year', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
axt.hlines(1/365.25,-10, 3100, 'r', linestyle = '-.' , alpha = 0.75, **l_kwargs )
axt.text(2700, 1/365.25, '1 day', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
axt.hlines(1/(365.25*24*3600),-10, 3100, 'r', linestyle = '-.' , alpha = 0.75, **l_kwargs )
axt.text(2700, 1/(365.25*24*3600), '1 sec', fontsize=10, va='center', ha='center', backgroundcolor='w', c = 'r')
ax.set_xlim([-10, 3100])
ax.set_ylim([1.3,4.8])
axt.set_ylim([10**(1.3*m_best + c_best), 10**(4.8*m_best+c_best)])
fig.tight_layout()
fig.savefig("aspect_ratio_timescales_skug.png", dpi =500)

In [ ]: