# First, let's import all the needed libraries.

import numpy as np
import matplotlib.pyplot as plt
import math
import scipy.stats as stats
import random
import pandas as pd


n0 = 25
p0 = 0.3
x_1 = 3
x_2 = 15
x_3 = 9


n = 25  # number of students
p = 0.3  # probability of success
k = 3  # exactly 3 students will pass the exam

math.comb(n, k) * p**k * (1 - p) ** (n - k)

0.02427998871170032


n = 25  # number of students
p = 0.3  # probability of success
k = 15  # exactly 15 students will pass the exam

math.comb(n, k) * p**k * (1 - p) ** (n - k)

0.0013248974242351928


n = 25  # number of students
p = 0.3  # probability of success

(
    math.comb(n, 0) * p**0 * (1 - p) ** (n - 0)
    + math.comb(n, 1) * p**1 * (1 - p) ** (n - 1)
    + math.comb(n, 2) * p**2 * (1 - p) ** (n - 2)
    + math.comb(n, 3) * p**3 * (1 - p) ** (n - 3)
    + math.comb(n, 4) * p**4 * (1 - p) ** (n - 4)
    + math.comb(n, 5) * p**5 * (1 - p) ** (n - 5)
    + math.comb(n, 6) * p**6 * (1 - p) ** (n - 6)
    + math.comb(n, 7) * p**7 * (1 - p) ** (n - 7)
    + math.comb(n, 8) * p**8 * (1 - p) ** (n - 8)
    + math.comb(n, 9) * p**9 * (1 - p) ** (n - 9)
)

0.8105639764950532


n = 25  # number of students
p = 0.3  # probability of success
k = [3, 15]

stats.binom.pmf(k[0], n, p), stats.binom.pmf(k[1], n, p)

(0.024279988711700378, 0.0013248974242351943)


n = 25  # number of students
p = 0.3  # probability of success
k = np.arange(1, 10)

stats.binom.pmf(k, n, p)

array([0.00143686, 0.00738956, 0.02427999, 0.0572314 , 0.10301652,
       0.14716646, 0.17119364, 0.16507958, 0.13363585])


np.sum(stats.binom.pmf(k, n, p))

0.8104298696330876


n = 25  # number of students
p = 0.3  # probability of success
k = 9

stats.binom.cdf(9, n, p)

0.8105639764950546


n = 25  # number of students
p = 0.3  # probability of success
k = 9

1 - stats.binom.cdf(9, n, p)

0.18943602350494537


n = 25  # number of students
p = 0.3  # probability of success
size = 100000  # number of random samples


random.seed(3)  # set seed for reproducibility
random_binom_numbers = stats.binom.rvs(n, p, size=size)


plt.figure(figsize=(10, 5))
plt.hist(
    random_binom_numbers,
    density=True,
    bins=len(np.unique(random_binom_numbers)),
    color="#fc9d12",
    edgecolor="grey",
)  # density=False would make counts
plt.xlabel("Students passing the final exam")
plt.ylabel("Probability")
plt.title("Binomial Probability Distribution \nfor size=25 and p=0.3")
plt.xticks(
    np.arange(min(random_binom_numbers), max(random_binom_numbers) + 1, 2.0)
)  # define x-axis ticks


plt.show()


n = 25  # number of students
p = 0.3  # probability of success
size = 100000  # number of random samples
random.seed(3)  # set seed for reproducibility
random_binom_numbers = pd.Series(stats.binom.rvs(n, p, size=size))

plt.figure(figsize=(10, 5))

ax = random_binom_numbers.plot.hist(
    bins=len(np.unique(random_binom_numbers)),
    density=True,
    edgecolor="grey",
    figsize=(10, 5),
)


for bar in ax.containers[0]:
    # get x midpoint of bar
    x = bar.get_x() + 0.5 * bar.get_width()

    # set bar color based on x
    if x < 9.5:
        bar.set_color("#fc4d62")
        bar.set_edgecolor("black")
    else:
        bar.set_color("white")
        bar.set_edgecolor("black")

plt.xlabel("Students passing the final exam")
plt.ylabel("Probability")
plt.title("Binomial Probability Distribution \nfor size=25 and p=0.3")
plt.xticks(np.arange(min(random_binom_numbers), max(random_binom_numbers) + 1, 2.0))

plt.xlim([0, n])
plt.ylim([0, 0.25])

# # P[X <= 11]
plt.axvline(x=9, color="red", linestyle="dashed")

plt.arrow(
    8.5,
    0.19,
    -5.5,
    0,
    length_includes_head=False,
    head_width=0.006,
    head_length=2,
    color="red",
)
plt.text(4, 0.21, r"$P(X \leq 9$)", fontsize=14, color="red")
plt.arrow(
    9.5,
    0.19,
    5.5,
    0,
    length_includes_head=False,
    head_width=0.006,
    head_length=2,
    color="red",
)
plt.text(11, 0.21, r"$P(X > 9$)", fontsize=14, color="red")


# # P[X=3]
plt.arrow(
    3.5,
    0.1,
    0,
    -0.07,
    length_includes_head=True,
    head_width=0.4,
    head_length=0.02,
    color="red",
)
plt.text(2, 0.11, r"$P(X =3$)", fontsize=14, color="red")

# # P[X=15]
plt.arrow(
    14.5,
    0.07,
    0,
    -0.06,
    length_includes_head=True,
    head_width=0.4,
    head_length=0.02,
    color="red",
)
plt.text(13.5, 0.08, r"$P(X =15$)", fontsize=14, color="red")

plt.show()


n = 25  # number of students
p = 0.3  # probability of success
size = 100000  # number of random samples
random.seed(3)  # set seed for reproducibility
random_binom_numbers = pd.Series(stats.binom.rvs(n, p, size=size))

plt.figure(figsize=(10, 5))
plt.plot(
    np.arange(1, n), stats.binom.cdf(np.arange(1, n), n, p), linewidth=3, color="black"
)

plt.xlabel("Students passing the final exam")
plt.ylabel("Cummulative Probability")
plt.title("Cummulative Binomial Probability Distribution \nfor size=25 and p=0.3")

plt.xticks(np.arange(min(random_binom_numbers), max(random_binom_numbers) + 9, 2.0))


plt.fill_between(
    x=np.arange(1, n),
    y1=stats.binom.cdf(np.arange(1, n), n, p),
    where=(np.arange(1, n) <= 9),
    color="red",
    alpha=0.5,
)

plt.text(3, 0.85, r"$P(X \leq 9$) =0.81", fontsize=14, color="black")
plt.text(6, 0.21, r"$P(X \leq 9$)", fontsize=14, color="black")

plt.axvline(x=9, ymax=0.8, color="black", linestyle="dashed")
plt.hlines(y=0.81, xmin=0, xmax=9, color="black", linestyle="dashed")
plt.show()


n = 25
p = 0.1  # probability of success
size = 100000  # number of random samples

random_binom_numbers = pd.Series(stats.binom.rvs(n, p, size=size))

fig, ax = plt.subplots(figsize=(10, 5))
plt.title("size = 25, p = 0.1", fontsize=16)

ax.hist(
    random_binom_numbers,
    bins=len(np.unique(random_binom_numbers)),
    color="white",
    edgecolor="black",
)

ax.set_xlabel("Size", fontsize=14)
ax.set_ylabel("Probability", fontsize=14)

# twin object for two different y-axis on the sample plot
ax2 = ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(
    np.arange(1, n), stats.binom.cdf(np.arange(1, n), n, p), linewidth=3, color="black"
)

ax2.set_ylabel("Cummulative probability", fontsize=14)

plt.show()


n = 25
p = 0.5  # probability of success
size = 100000  # number of random samples

random_binom_numbers = pd.Series(stats.binom.rvs(n, p, size=size))

fig, ax = plt.subplots(figsize=(10, 5))
plt.title("size = 25, p = 0.5", fontsize=16)

ax.hist(
    random_binom_numbers,
    bins=len(np.unique(random_binom_numbers)),
    color="white",
    edgecolor="black",
)

ax.set_xlabel("Size", fontsize=14)
ax.set_ylabel("Probability", fontsize=14)

# twin object for two different y-axis on the sample plot
ax2 = ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(
    np.arange(1, n), stats.binom.cdf(np.arange(1, n), n, p), linewidth=3, color="black"
)

ax2.set_ylabel("Cummulative probability", fontsize=14)

plt.show()


n = 25
p = 0.9  # probability of success
size = 100000  # number of random samples

random_binom_numbers = pd.Series(stats.binom.rvs(n, p, size=size))

fig, ax = plt.subplots(figsize=(10, 5))
plt.title("size = 25, p = 0.9", fontsize=16)

ax.hist(
    random_binom_numbers,
    bins=len(np.unique(random_binom_numbers)),
    color="white",
    edgecolor="black",
)

ax.set_xlabel("Size", fontsize=14)
ax.set_ylabel("Probability", fontsize=14)

# twin object for two different y-axis on the sample plot
ax2 = ax.twinx()
# make a plot with different y-axis using second axis object
ax2.plot(
    np.arange(1, n), stats.binom.cdf(np.arange(1, n), n, p), linewidth=3, color="black"
)

ax2.set_ylabel("Cummulative probability", fontsize=14)

plt.show()

The Binomial Probability Formula¶