Skip to content

01

Import

import numpy as np

Basics

np.array([1, 2, 3, 4, 5])
np.arange(1, 100, 10) ## start, step, step
np.linspace(1, 100, 10) ## start, step, no of values

## idk
np.zeros(10)
np.ones(10)

## random
np.random.random(10)
np.random.randn(10)

Array Operations

## Element-wise
a+3
1/a

## Boolean
a > 4

Indexing

a[2]

a[2:]
a[-10:]

a[:10]
a[:-10]

a[::2] ## even rows
a[1::2] ## odd rows


## Masking
a[a > 4]

np.vectorize

Kinda like a for loop

names = ["Thahir", "Azhar"]
first_letter = np.vectorize(lambda x: x[0])(names) 

Stats

np.mean(a)
np.median(a)
np.std(a)
np.quantile(a, 0.90)
np.percenile(a, 90)

Calculus

## analytic calculus (for symbolic, use sympy)
dydx = np.gradient(y, x )
y_int = np.cumsum(y) * (x[1]-x[0])

Multi-Dimensional

a = np.array([
  [1, 2, 3],
  [4, 5, 6]
])
a = np.random.randn(3, 3)

a.ravel() ## returns a 1d array

a[0] ## first row
a[:,0] ## first column

Mesh Grid

xv, yv = np.meshgrid(x, y)
zv = xv**2 + yv**2
plt.contourf(xv, yv, zv, levels=100)
plt.colorbar()

Linear Algebra

Matrix

  a.T
  a*b ## element-wise operator
  a@b ## matrix multiplication
  a.dot(b)
  a.cross(b)

Solve systems of equations

  a = np.array([
    [3, 2, 1],
    [5, -5, 4],
    [6, 0, 1]
  ])
  b = np.array([
    4,
    3,
    0
  ])

  x = np.linalg.solve(a, b) ## ax = b

Eigenvalues

  temp = np.linalg.eig(A)
  eigen_values = temp[0]
  eigen_vector = temp[1][:, 0]

Find-Replace

if

  prediction['Rating'] = np.where(
    prediction['Rating'].to_numpy() > 100,
    100,
    prediction['Rating'].to_numpy()
  )

if-else

  prediction['Rating'] = np.where(
    prediction['Rating'].to_numpy() > 100,
    100,
    0
  )

if-elseif-else

  conditions = [
    prediction['Rating'].to_numpy() > 100,
    prediction['Rating'].to_numpy() > 50,
    prediction['Rating'].to_numpy() > 20
  ]

  values = [
    100,
    50,
    20  
  ]

  default = 0

  prediction['Rating'] = np.select(
    conditions,
    values,
    default = default
  )

nested

  conditions = [
    (prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 2 == 0),
    (prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 3 == 0),
    (prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 4 == 0),

    prediction['Rating'].to_numpy() > 50,
    prediction['Rating'].to_numpy() > 20
  ]

  values = [
    102,
    103,
    104,

    50,
    20  
  ]

  default = 0

  prediction['Rating'] = np.select(
    conditions,
    values,
    default = default
  )

Rounding

Round to Integer

  np.around(prediction)

  ## instead of
  ## prediction = ( round(element) for element in prediction )

Round to \(n\) places

  np.around(prediction, n)

Read data

data = np.loadtxt(
  "./data.csv",
  dtype = "object",
  delimiter = ",",
  unpack = True,
  skiprows = 1 
)

Save

np.savetxt(
    filename + ".csv",
  data,
  delimiter = ",",
  fmt = "%d",
  header = "Col1, Col2"
)

Cartesian

Indexing

High space complexity

import numpy as np

def cartesian(arrays, out=None):
    """
    Generate a Cartesian product of input arrays.

    Parameters
    ----------
    arrays : list of array-like
        1-D arrays to form the Cartesian product of.
    out : ndarray
        Array to place the Cartesian product in.

    Returns
    -------
    out : ndarray
        2-D array of shape (M, len(arrays)) containing Cartesian products
        formed of input arrays.

    Examples
    --------
    >>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
    array([[1, 4, 6],
           [1, 4, 7],
           [1, 5, 6],
           [1, 5, 7],
           [2, 4, 6],
           [2, 4, 7],
           [2, 5, 6],
           [2, 5, 7],
           [3, 4, 6],
           [3, 4, 7],
           [3, 5, 6],
           [3, 5, 7]])

    """

    arrays = [np.asarray(x) for x in arrays]
    dtype = arrays[0].dtype

    n = np.prod([x.size for x in arrays])
    if out is None:
        out = np.zeros([n, len(arrays)], dtype=dtype)

    #m = n / arrays[0].size
    m = int(n / arrays[0].size)
    out[:,0] = np.repeat(arrays[0], m)
    if arrays[1:]:
        cartesian(arrays[1:], out=out[0:m, 1:])
        for j in range(1, arrays[0].size):
        #for j in xrange(1, arrays[0].size):
            out[j*m:(j+1)*m, 1:] = out[0:m, 1:]
    return out

Pairwise Mutual Information Matrix

from joblib import Parallel, delayed

class PairwiseMutualInformation():
  def __init__(self, normalized=True, n_bins=None, sample=None, random_state=None, n_jobs=None, ):
    self.n_bins = n_bins
    self.sample = sample
    self.normalized = normalized
    self.random_state = random_state
    self.n_jobs = n_jobs if n_jobs is not None else 1

  def compute_histogram2d(self, i, j, X, n_bins):
        return np.histogram2d(X[:, i], X[:, j], bins=n_bins)[0]

  def joint_entropies(self, X):
    histograms2d = np.empty((self.n_variables, self.n_variables, self.n_bins, self.n_bins))

    results = (
        Parallel(n_jobs=self.n_jobs)
        (
            delayed(self.compute_histogram2d)
            (i, j, X, self.n_bins)
            for i in range(self.n_variables)
            for j in range(self.n_variables)
        )
    )

    index = 0
    for i in range(self.n_variables):
        for j in range(self.n_variables):
            histograms2d[i, j] = results[index]
            index += 1

    probs = histograms2d / len(X) + 1e-100
    joint_entropies = -(probs * np.log2(probs)).sum((2,3))
    return joint_entropies

  def get_mutual_info_matrix(self, X):
    j_entropies = self.joint_entropies(X)
    entropies = j_entropies.diagonal()
    entropies_tile = np.tile(entropies, (self.n_variables, 1))
    sum_entropies = entropies_tile + entropies_tile.T

    mi_matrix = sum_entropies - j_entropies
    if self.normalized:
        mi_matrix = mi_matrix * 2 / sum_entropies
    return mi_matrix

  def fit(self, X, y=None):
    self.columns_ = X.columns

    if self.sample is not None:
      if type(self.sample) == int:
        X = df.sample(n=self.sample, random_state=self.random_state)
      elif type(self.sample) == float:
        X = df.sample(frac=self.sample, random_state=self.random_state)
      else:
        pass

    X = X.to_numpy()

    self.n_variables = X.shape[-1]
    self.n_samples = X.shape[0]

    if self.n_bins == None:
        self.n_bins = int((self.n_samples/5)**.5)

    self.mi_matrix_ = self.get_mutual_info_matrix(X)
    return self

  def transform(self, X, y=None):
    return pd.DataFrame(self.mi_matrix_, index=self.columns_, columns=self.columns_)

  def fit_transform(self, X, y=None):
    return self.fit(X, y).transform(X, y)
matrix_similarity = PairwiseMutualInformation(normalized=True, n_jobs=-1, sample=0.10, random_state=0).fit_transform(df)
Last Updated: 2024-12-26 ; Contributors: AhmedThahir, web-flow

Comments