Python¶
Import Package¶
All the above take the same duration; no performance difference
Garbage Collection¶
Manually collecting will be faster than automatic
import gc
g0, g1, g2 = gc.get_threshold() # default: 700, 10, 10
# gc.set_threshold(10_000, 10, 10)
gc.collect(generation=0)
gc.set_threshold(0)
gc.disable()
gc.freeze()
heavy_code() # like ML, database
gc.unfreeze()
gc.set_threshold(10_000, 10, 10)
gc.enable()
gc.collect(generation=0)
# exit
# don't cleanup on exit
atexit.register(os._exit, 0) # only for Python < 3.6
IDK¶
A collection before POSIX fork()
call may free pages for future allocation which can cause copy-on-write too
Hence
- Parent process
- disable garbage collector
- freeze before fork
- Child process
- Enable garbage collector
Machine Learning¶
gc.set_threshold(0)
gc.disable()
for epoch in range(n_epochs):
for batch in batch_data_loader:
# train
# eval
gc.collect(0)
gc.collect()
# exit
atexit.register(os._exit, 0) # only for Python < 3.6
IDK¶
gc.disable()
will sometimes got overridden by another library callinggc.enable()
Number Formatting¶
number = 333.43
"{:02d}".format(1) ## leading zeroes
"{:2f}".format(number) ## floating point rounding
f"{x:z}" ## rounds negative 0
f"{x:z.1f}"
Hex to RGBA¶
def hex_to_rgba(h, alpha):
'''
converts color value in hex format to rgba format with alpha transparency
'''
return "rgba" + str(tuple([int(h.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)] + [alpha]))
Custom Rounding¶
def round_f(x, places, percentage=False):
if percentage:
x *= 100
string = f"{x:z.{places}f}"
if places > 0:
string = string.rstrip('0').rstrip('.')
if percentage:
string += "%"
return string
def round_s(x, significant_decimals, max_digits=None, percentage=False):
if percentage:
x *= 100
if max_digits is None:
max_digits = min(significant_decimals * 2, 4)
decimal_digits = str(x).split(".")[1]
pos_first_non_zero = len(decimal_digits) - len(decimal_digits.lstrip("0"))
pos = pos_first_non_zero + significant_decimals
return round_f(x, min(pos, max_digits))
Text¶
names = names.split("\n")
names = names.split(",")
## remove empty strings from string list
names = list(filter(None, names))
class color:
PURPLE = '\033[95m'
CYAN = '\033[96m'
DARKCYAN = '\033[36m'
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'
print(color.BOLD + 'Hello World !' + color.END)
IDK¶
Name of script¶
Useful for pages in StreamlitName of calling function¶
Useful for checking if this is a program or a libraryInput Hidden Text/Password¶
Date-Time¶
Refer to Python DateTime Formats
Find Home Directory¶
✅ This is cross-platform
Delete¶
Move file to Recycle Bin¶
from send2trash import send2trash
send2trash("test_folder")
send2trash("test.csv")
for file_name in glob.glob(os.path.join(directory, '*.mov')):
file = os.path.join(directory, file_name)
send2trash(file)
print("Deleted", file)
❌ Permanently Delete File¶
os.remove(file)
for file_name in glob.glob(os.path.join(directory, '*.mov')):
file = os.path.join(directory, file_name)
os.remove(file)
print("Deleted", file, "Permanently")
❌ Permanently Delete Folder¶
Copy¶
import shutil
shutil.copy_file(src, dest) ## contents of file
shutil.copy() ## copy_file() + permission mode
shutil.copy2() ## copy() + copies metadata
List all files in a directory¶
import os
for file_name in os.listdir("./data"):
file = os.path.join(directory, file_name)
print(file)
Create a folder¶
Get only files of a particular type using glob
¶
for file_name in glob.glob(os.path.join(directory, '*.mp4')):
file = os.path.join(directory, file_name)
print(file)
Get files of Multiple Types using glob
¶
# better
from pathlib import Path
all_note_paths = (
p.resolve() for p in Path("./").glob("**/*") if p.suffix in [
".md", ".css", ".js", ".html"
]
)
def list_files(images_dir):
l = []
for type in ["jpg", "jpeg", "png"]:
this_type_files = glob.glob(
os.path.join(images_dir, "**", f"*.{type}"),
recursive = True
)
l += this_type_files
return l
Get filename with extension¶
Get folder name¶
filename = "folder/file.mp4"
os.path.dirname(filename)
filename = "folder/folder/file.mp4"
os.path.basename(os.path.dirname(filename))
Get extension only¶
Get filename only¶
import os
def get_filename(file):
file_name = os.path.basename(file)
file_name_without_ext = os.path.splitext(file_name)[0]
## using the above
new_file_name = os.path.splitext(file_name)[0] ## + "_Copy" + os.path.splitext(file_name)[1]
return new_file_name
Files in directory and sub-directory¶
from os import walk
files = []
## specific directory
files = []
for (dirpath, dirnames, filenames) in walk("./data"):
files.extend(filenames)
break
## directory and subdirectories
for (dirpath, dirnames, filenames) in walk("."):
files.extend(filenames)
files
Search Substring in String¶
IDK¶
Class¶
Inspect¶
Inspect class
def func_of_class(class_name):
return [
func for func in dir(class_name)
if callable(getattr(class_name, func))
and not func.startswith("__")
]
obj = my_class()
for func in func_of_class(type(obj)):
getattr(obj, func)(arg)
Create virtual environment¶
Switch to this virtual environment If you get an error when a powershell script runs, run this code in Powershell (admin)Traverse list with index and value¶
CLI¶
Argparse¶
import argparse
TIME_THRESHOLD = 10
HASH_SIZE = 4
if __name__ == "__main__":
parser = argparse.ArgumentParser(description = "Group similar images")
parser.add_argument("--tt", type = int, help = f"Time Threshold (seconds), default = {TIME_THRESHOLD}")
parser.add_argument("--hs", type = int, help = f"Hash Size, default = {HASH_SIZE}")
args = parser.parse_args()
TIME_THRESHOLD = args.tt
HASH_SIZE = args.hs
Adding to path using setuptools¶
https://python-packaging.readthedocs.io/en/latest/command-line-scripts.html
Import classes/functions from another python file¶
Get classes of module/Python file¶
def get_classes_of_module(module):
m = []
import importlib, inspect
for name, c in inspect.getmembers(importlib.import_module("utils.models"), inspect.isclass):
if c.__module__ == 'utils.models':
m.append(c)
return m
Caching¶
from functools import cache
import time
@cache
def function():
time.sleep(10) ## this will be skipped by cache
return 1
Get variable name¶
import inspect
def var(var):
current_frame = inspect.currentframe()
caller_frame = inspect.getouterframes(current_frame)[1]
local_vars = caller_frame.frame.f_locals
for name, value in local_vars.items():
if value is var:
return name
var = "Hello"
var_name = var(var)
Memory Usage¶
def get_memory_usage():
process = Process(os.getpid())
mb = process.memory_info().rss/(1024**2)
return mb
Lazy Imports¶
class LazyImport:
def __init__(self, module_name):
self.module_name = module_name
self._module = None
def __getattr__(self, attr):
if self._module is None:
self._module = importlib.import_module(self.module_name)
return getattr(self._module, attr)
np = LazyImport("numpy")
np.array([0, 1, 2])
Save File¶
def save_file(file, file_name, location):
with open(os.path.join(location, file_name), "wb") as f:
f.write(file.getbuffer())
Intersection of 2 Curves¶
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0, 1000)
f = np.arange(0, 1000)
g = np.sin(np.arange(0, 10, 0.01) * 2) * 1000
plt.plot(x, f, '-')
plt.plot(x, g, '-')
idx = np.argwhere(np.diff(np.sign(f - g))).flatten()
plt.plot(x[idx], f[idx], 'ro')
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import LineString
df = pd.read_excel('D:/Coding Practice/data/data_supply-demand.xlsx')
supply = df['Supply']
demand = df['Demand']
price = df['Price(dollar)']
#it's time for visualization
plt.plot(supply,price)
plt.plot(demand,price)
line_1 = LineString(np.column_stack((supply, price)))
line_2 = LineString(np.column_stack((demand, price)))
intersection = line_1.intersection(line_2)
plt.plot(*intersection.xy, 'ro')
plt.show()
x, y = intersection.xy
print(x, y)
Progress Bar¶
for epoch in range(NUM_EPOCHS):
loop = tqdm(loader)
for idx, (x, y) in enumerate(loop):
scores = model(x)
# here we would compute loss, backward, optimizer step etc.
# you know how it goes, but now you have a nice progress bar
# with tqdm
# then at the bottom if you want additional info shown, you can
# add it here, for loss and accuracy you would obviously compute
# but now we just set them to random values
loop.set_description(f"Epoch [{epoch}/{NUM_EPOCHS}]")
loop.set_postfix(
loss=torch.rand(1).detatch(),
acc=torch.rand(1).detatch()
)