01
Import¶
Basics¶
np.array([1, 2, 3, 4, 5])
np.arange(1, 100, 10) ## start, step, step
np.linspace(1, 100, 10) ## start, step, no of values
## idk
np.zeros(10)
np.ones(10)
## random
np.random.random(10)
np.random.randn(10)
Array Operations¶
Indexing¶
np.vectorize¶
Kinda like a for loop
Stats¶
Calculus¶
## analytic calculus (for symbolic, use sympy)
dydx = np.gradient(y, x )
y_int = np.cumsum(y) * (x[1]-x[0])
Multi-Dimensional¶
a = np.array([
[1, 2, 3],
[4, 5, 6]
])
a = np.random.randn(3, 3)
a.ravel() ## returns a 1d array
a[0] ## first row
a[:,0] ## first column
Mesh Grid¶
Linear Algebra¶
Matrix¶
Solve systems of equations¶
a = np.array([
[3, 2, 1],
[5, -5, 4],
[6, 0, 1]
])
b = np.array([
4,
3,
0
])
x = np.linalg.solve(a, b) ## ax = b
Eigenvalues¶
Find-Replace¶
if
prediction['Rating'] = np.where(
prediction['Rating'].to_numpy() > 100,
100,
prediction['Rating'].to_numpy()
)
if-else
if-elseif-else
conditions = [
prediction['Rating'].to_numpy() > 100,
prediction['Rating'].to_numpy() > 50,
prediction['Rating'].to_numpy() > 20
]
values = [
100,
50,
20
]
default = 0
prediction['Rating'] = np.select(
conditions,
values,
default = default
)
nested
conditions = [
(prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 2 == 0),
(prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 3 == 0),
(prediction['Rating'].to_numpy() > 100 & prediction['Rating'].to_numpy() % 4 == 0),
prediction['Rating'].to_numpy() > 50,
prediction['Rating'].to_numpy() > 20
]
values = [
102,
103,
104,
50,
20
]
default = 0
prediction['Rating'] = np.select(
conditions,
values,
default = default
)
Rounding¶
Round to Integer¶
Round to \(n\) places
Read data¶
Save¶
Cartesian¶
Indexing¶
High space complexity
import numpy as np
def cartesian(arrays, out=None):
"""
Generate a Cartesian product of input arrays.
Parameters
----------
arrays : list of array-like
1-D arrays to form the Cartesian product of.
out : ndarray
Array to place the Cartesian product in.
Returns
-------
out : ndarray
2-D array of shape (M, len(arrays)) containing Cartesian products
formed of input arrays.
Examples
--------
>>> cartesian(([1, 2, 3], [4, 5], [6, 7]))
array([[1, 4, 6],
[1, 4, 7],
[1, 5, 6],
[1, 5, 7],
[2, 4, 6],
[2, 4, 7],
[2, 5, 6],
[2, 5, 7],
[3, 4, 6],
[3, 4, 7],
[3, 5, 6],
[3, 5, 7]])
"""
arrays = [np.asarray(x) for x in arrays]
dtype = arrays[0].dtype
n = np.prod([x.size for x in arrays])
if out is None:
out = np.zeros([n, len(arrays)], dtype=dtype)
#m = n / arrays[0].size
m = int(n / arrays[0].size)
out[:,0] = np.repeat(arrays[0], m)
if arrays[1:]:
cartesian(arrays[1:], out=out[0:m, 1:])
for j in range(1, arrays[0].size):
#for j in xrange(1, arrays[0].size):
out[j*m:(j+1)*m, 1:] = out[0:m, 1:]
return out
Pairwise Mutual Information Matrix¶
from joblib import Parallel, delayed
class PairwiseMutualInformation():
def __init__(self, normalized=True, n_bins=None, sample=None, random_state=None, n_jobs=None, ):
self.n_bins = n_bins
self.sample = sample
self.normalized = normalized
self.random_state = random_state
self.n_jobs = n_jobs if n_jobs is not None else 1
def compute_histogram2d(self, i, j, X, n_bins):
return np.histogram2d(X[:, i], X[:, j], bins=n_bins)[0]
def joint_entropies(self, X):
histograms2d = np.empty((self.n_variables, self.n_variables, self.n_bins, self.n_bins))
results = (
Parallel(n_jobs=self.n_jobs)
(
delayed(self.compute_histogram2d)
(i, j, X, self.n_bins)
for i in range(self.n_variables)
for j in range(self.n_variables)
)
)
index = 0
for i in range(self.n_variables):
for j in range(self.n_variables):
histograms2d[i, j] = results[index]
index += 1
probs = histograms2d / len(X) + 1e-100
joint_entropies = -(probs * np.log2(probs)).sum((2,3))
return joint_entropies
def get_mutual_info_matrix(self, X):
j_entropies = self.joint_entropies(X)
entropies = j_entropies.diagonal()
entropies_tile = np.tile(entropies, (self.n_variables, 1))
sum_entropies = entropies_tile + entropies_tile.T
mi_matrix = sum_entropies - j_entropies
if self.normalized:
mi_matrix = mi_matrix * 2 / sum_entropies
return mi_matrix
def fit(self, X, y=None):
self.columns_ = X.columns
if self.sample is not None:
if type(self.sample) == int:
X = df.sample(n=self.sample, random_state=self.random_state)
elif type(self.sample) == float:
X = df.sample(frac=self.sample, random_state=self.random_state)
else:
pass
X = X.to_numpy()
self.n_variables = X.shape[-1]
self.n_samples = X.shape[0]
if self.n_bins == None:
self.n_bins = int((self.n_samples/5)**.5)
self.mi_matrix_ = self.get_mutual_info_matrix(X)
return self
def transform(self, X, y=None):
return pd.DataFrame(self.mi_matrix_, index=self.columns_, columns=self.columns_)
def fit_transform(self, X, y=None):
return self.fit(X, y).transform(X, y)
matrix_similarity = PairwiseMutualInformation(normalized=True, n_jobs=-1, sample=0.10, random_state=0).fit_transform(df)