df["Reading"]=(df.sort_values("Time_Point",ascending=True)# this is the time column, so always ascending.groupby("Temperature")["Reading"].cummin())# or df_train["Reading_Time_Point_Cummin"]=(df_train.sort_values("Time_Point",ascending=True)# this is the time column, so always ascending.groupby("Temperature")["Reading"].cummin())df_train=df_train.query("Reading <= Reading_Time_Point_Cummin")
df["Reading"]=(df.sort_values("Time_Point",ascending=True)# this is the time column, so always ascending.groupby("Temperature")["Reading"].cummax())df_train["Reading_Time_Point_Cummax"]=(df_train.sort_values("Time_Point",ascending=True)# this is the time column, so always ascending.groupby("Temperature")["Reading"].cummax())df_train=df_train.query("Reading >= Reading_Time_Point_Cummin")
# if n=frac, then percentage of datasetdf.sample(0.10)# if n=int, then count of datasetdf.sample(1_000)# sampling with prob weightsdf.sample(1_000,weights="Weight_Column")