SIP_GoldBees_Backend/strategy_code/sma_momemtum_sip_model.py
2026-02-01 13:57:30 +00:00

591 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
import numpy as np
import yfinance as yf
# =========================================================
# CONFIG
# =========================================================
START_DATE = "2010-01-01"
END_DATE = "2025-12-31"
SIP_START_DATE = "2018-04-01"
SIP_END_DATE = "2025-10-31" # set None for "till last data"
MONTHLY_SIP = 100
# Order frequency parameter (N trading days)
ORDER_EVERY_N = 5 # 5 => every 5 trading days, 30 => every 30 trading days
# Whether to keep MONTHLY_SIP constant (recommended)
# True => scales cash per order so approx monthly investment stays ~MONTHLY_SIP
# False => invests MONTHLY_SIP every order (be careful: for N=5 this is much larger than monthly SIP)
KEEP_MONTHLY_BUDGET_CONSTANT = True
NIFTY = "NIFTYBEES.NS"
GOLD = "GOLDBEES.NS"
SIP_START_DATE = pd.to_datetime(SIP_START_DATE)
SIP_END_DATE = pd.to_datetime(SIP_END_DATE) if SIP_END_DATE else None
# =========================================================
# CENTRAL SIP DATE WINDOW
# =========================================================
def in_sip_window(date):
if date < SIP_START_DATE:
return False
if SIP_END_DATE and date > SIP_END_DATE:
return False
return True
# =========================================================
# VALUATION (SMA MEAN REVERSION)
# =========================================================
SMA_MONTHS = 36
TILT_MULT = 1.5
MAX_TILT = 0.25
BASE_EQUITY = 0.60
MIN_EQUITY = 0.20
MAX_EQUITY = 0.90
# For daily data
TRADING_DAYS_PER_MONTH = 21
SMA_DAYS = SMA_MONTHS * TRADING_DAYS_PER_MONTH # ~36 months on daily series
# =========================================================
# DATA LOAD
# =========================================================
def load_price(ticker):
df = yf.download(
ticker,
start=START_DATE,
end=END_DATE,
auto_adjust=True,
progress=False
)
if isinstance(df.columns, pd.MultiIndex):
df.columns = df.columns.get_level_values(0)
return df["Close"]
prices = pd.DataFrame({
"NIFTY": load_price(NIFTY),
"GOLD": load_price(GOLD)
}).dropna()
# Use business-day frequency (daily for trading)
prices = prices.resample("B").last().dropna()
# =========================================================
# ORDER SCHEDULE: EVERY N TRADING DAYS
# =========================================================
def get_order_dates(index: pd.DatetimeIndex, n: int) -> pd.DatetimeIndex:
window = index[(index >= SIP_START_DATE) & ((index <= SIP_END_DATE) if SIP_END_DATE else True)]
return window[::n]
ORDER_DATES = get_order_dates(prices.index, ORDER_EVERY_N)
# Cash per order
if KEEP_MONTHLY_BUDGET_CONSTANT:
# Approx monthly orders = 21 / N
orders_per_month = TRADING_DAYS_PER_MONTH / ORDER_EVERY_N
SIP_AMOUNT_PER_ORDER = MONTHLY_SIP / orders_per_month
else:
SIP_AMOUNT_PER_ORDER = MONTHLY_SIP
# =========================================================
# (OPTIONAL) CHECK PRICE ON A GIVEN DATE (UNCHANGED)
# =========================================================
def check_price_on_date(ticker, date_str):
date = pd.to_datetime(date_str)
df = yf.download(
ticker,
start=date - pd.Timedelta(days=5),
end=date + pd.Timedelta(days=5),
auto_adjust=False,
progress=False
)
if df.empty:
print(f"No data returned for {ticker}")
return
if isinstance(df.columns, pd.MultiIndex):
df.columns = df.columns.get_level_values(0)
if date not in df.index:
print(f"{ticker} | {date.date()} → No trading data (holiday / no volume)")
return
row = df.loc[date]
print(f"\n{ticker}{date.date()}")
print(f"Open : ₹{row['Open']:.2f}")
print(f"High : ₹{row['High']:.2f}")
print(f"Low : ₹{row['Low']:.2f}")
print(f"Close: ₹{row['Close']:.2f}")
print(f"Volume: {int(row['Volume'])}")
check_price_on_date("NIFTYBEES.NS", "2025-11-10")
# =========================================================
# SMA DEVIATION (DAILY)
# =========================================================
sma_nifty = prices["NIFTY"].rolling(SMA_DAYS).mean()
sma_gold = prices["GOLD"].rolling(SMA_DAYS).mean()
dev_nifty = (prices["NIFTY"] / sma_nifty) - 1
dev_gold = (prices["GOLD"] / sma_gold) - 1
rel_dev = dev_nifty - dev_gold
# =========================================================
# SIPXAR ENGINE (FLOW ONLY, NO REBALANCE) — EXECUTES ON ORDER_DATES
# =========================================================
def run_sipxar(prices, rel_dev, order_dates, sip_amount_per_order):
nifty_units = 0.0
gold_units = 0.0
total_invested = 0.0
prev_value = None
rows = []
for date in order_dates:
nifty_price = prices.loc[date, "NIFTY"]
gold_price = prices.loc[date, "GOLD"]
cash = float(sip_amount_per_order)
total_invested += cash
rd = rel_dev.loc[date]
tilt = 0.0
if not pd.isna(rd):
tilt = np.clip(-rd * TILT_MULT, -MAX_TILT, MAX_TILT)
equity_w = BASE_EQUITY * (1 + tilt)
equity_w = min(max(equity_w, MIN_EQUITY), MAX_EQUITY)
gold_w = 1 - equity_w
nifty_buy = cash * equity_w
gold_buy = cash * gold_w
nifty_units += nifty_buy / nifty_price
gold_units += gold_buy / gold_price
nifty_val = nifty_units * nifty_price
gold_val = gold_units * gold_price
port_val = nifty_val + gold_val
unrealized = port_val - total_invested
if prev_value is None:
period_pnl = 0.0
else:
period_pnl = port_val - prev_value - cash
prev_value = port_val
rows.append({
"Date": date,
"Cash_Added": round(cash, 4),
"Total_Invested": round(total_invested, 4),
"Equity_Weight": round(equity_w, 3),
"Gold_Weight": round(gold_w, 3),
"Rel_Deviation": round(float(rd) if not pd.isna(rd) else np.nan, 4),
"NIFTY_Units": round(nifty_units, 6),
"GOLD_Units": round(gold_units, 6),
"NIFTY_Value": round(nifty_val, 4),
"GOLD_Value": round(gold_val, 4),
"Portfolio_Value": round(port_val, 4),
"Period_PnL": round(period_pnl, 4),
"Unrealized_PnL": round(unrealized, 4)
})
return pd.DataFrame(rows).set_index("Date")
# =========================================================
# RUN SIPXAR
# =========================================================
sipxar_ledger = run_sipxar(
prices=prices,
rel_dev=rel_dev,
order_dates=ORDER_DATES,
sip_amount_per_order=SIP_AMOUNT_PER_ORDER
)
start_dt = sipxar_ledger.index.min()
end_dt = sipxar_ledger.index.max()
# =========================================================
# XIRR
# =========================================================
def xirr(cashflows):
dates = np.array([cf[0] for cf in cashflows], dtype="datetime64[D]")
amounts = np.array([cf[1] for cf in cashflows], dtype=float)
def npv(rate):
years = (dates - dates[0]).astype(int) / 365.25
return np.sum(amounts / ((1 + rate) ** years))
low, high = -0.99, 5.0
for _ in range(200):
mid = (low + high) / 2
val = npv(mid)
if abs(val) < 1e-6:
return mid
if val > 0:
low = mid
else:
high = mid
return mid
cashflows_sipxar = []
for date, row in sipxar_ledger.iterrows():
cashflows_sipxar.append((date, -row["Cash_Added"]))
final_date = sipxar_ledger.index[-1]
final_value = sipxar_ledger["Portfolio_Value"].iloc[-1]
cashflows_sipxar.append((final_date, final_value))
sipxar_xirr = xirr(cashflows_sipxar)
# =========================================================
# COMPARISONS: NIFTY-ONLY + STATIC 60/40 — EXECUTES ON ORDER_DATES
# =========================================================
def run_nifty_sip(prices, order_dates, sip_amount):
units = 0.0
rows = []
for date in order_dates:
price = prices.loc[date, "NIFTY"]
units += sip_amount / price
rows.append((date, units * price))
return pd.DataFrame(rows, columns=["Date", "Value"]).set_index("Date")
def run_static_sip(prices, order_dates, sip_amount, eq_w=0.6):
n_units = 0.0
g_units = 0.0
rows = []
for date in order_dates:
n_price = prices.loc[date, "NIFTY"]
g_price = prices.loc[date, "GOLD"]
n_units += (sip_amount * eq_w) / n_price
g_units += (sip_amount * (1 - eq_w)) / g_price
rows.append((date, n_units * n_price + g_units * g_price))
return pd.DataFrame(rows, columns=["Date", "Value"]).set_index("Date")
def build_sip_ledger(value_df, sip_amount):
total = 0.0
rows = []
prev_value = None
for date, row in value_df.iterrows():
total += sip_amount
value = float(row.iloc[0])
if prev_value is None:
period_pnl = 0.0
else:
period_pnl = value - prev_value - sip_amount
prev_value = value
rows.append({
"Date": date,
"Cash_Added": round(sip_amount, 4),
"Total_Invested": round(total, 4),
"Portfolio_Value": round(value, 4),
"Period_PnL": round(period_pnl, 4)
})
return pd.DataFrame(rows).set_index("Date")
nifty_sip = run_nifty_sip(prices, ORDER_DATES, SIP_AMOUNT_PER_ORDER)
static_sip = run_static_sip(prices, ORDER_DATES, SIP_AMOUNT_PER_ORDER, 0.6)
nifty_sip = nifty_sip.loc[start_dt:end_dt]
static_sip = static_sip.loc[start_dt:end_dt]
nifty_ledger = build_sip_ledger(nifty_sip, SIP_AMOUNT_PER_ORDER)
static_ledger = build_sip_ledger(static_sip, SIP_AMOUNT_PER_ORDER)
cashflows_nifty = [(d, -SIP_AMOUNT_PER_ORDER) for d in nifty_sip.index]
cashflows_nifty.append((nifty_sip.index[-1], float(nifty_sip["Value"].iloc[-1])))
nifty_xirr = xirr(cashflows_nifty)
cashflows_static = [(d, -SIP_AMOUNT_PER_ORDER) for d in static_sip.index]
cashflows_static.append((static_sip.index[-1], float(static_sip["Value"].iloc[-1])))
static_xirr = xirr(cashflows_static)
# =========================================================
# PRINTS
# =========================================================
print("\n=== CONFIG SUMMARY ===")
print(f"Order every N trading days: {ORDER_EVERY_N}")
print(f"Keep monthly budget constant: {KEEP_MONTHLY_BUDGET_CONSTANT}")
print(f"MONTHLY_SIP (target): ₹{MONTHLY_SIP}")
print(f"SIP_AMOUNT_PER_ORDER: ₹{SIP_AMOUNT_PER_ORDER:.4f}")
print(f"Orders executed: {len(ORDER_DATES)}")
print(f"Period: {start_dt.date()}{end_dt.date()}")
print("\n=== SIPXAR LEDGER (LAST 12 ROWS) ===")
print(sipxar_ledger.tail(12))
print("\n=== EQUITY WEIGHT DISTRIBUTION ===")
print(sipxar_ledger["Equity_Weight"].describe())
print("\n=== STEP 1: XIRR COMPARISON ===")
print(f"SIPXAR XIRR : {sipxar_xirr*100:.2f}%")
print(f"NIFTY SIP XIRR : {nifty_xirr*100:.2f}%")
print(f"60/40 SIP XIRR : {static_xirr*100:.2f}%")
# =========================================================
# EXPORT
# =========================================================
output_file = "SIPXAR_Momentum_SIP.xlsx"
with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
sipxar_ledger.to_excel(writer, sheet_name="Ledger")
yearly = sipxar_ledger.copy()
yearly["Year"] = yearly.index.year
yearly_summary = yearly.groupby("Year").agg({
"Cash_Added": "sum",
"Total_Invested": "last",
"Portfolio_Value": "last",
"Unrealized_PnL": "last"
})
yearly_summary.to_excel(writer, sheet_name="Yearly_Summary")
print(f"\nExcel exported successfully: {output_file}")
# =========================================================
# PHASE 2: CRASH & SIDEWAYS REGIME BACKTEST (ADAPTED TO DAILY INDEX)
# =========================================================
def rolling_cagr(series: pd.Series, periods: int, years: float):
r = series / series.shift(periods)
return (r ** (1 / years)) - 1
def window_xirr_from_value(value_df, start, end, sip_amount):
df = value_df.loc[start:end]
if len(df) < 6:
return np.nan
cashflows = [(d, -sip_amount) for d in df.index]
cashflows.append((df.index[-1], float(df.iloc[-1, 0])))
return xirr(cashflows)
def sip_max_drawdown(ledger):
value = ledger["Portfolio_Value"]
peak = value.cummax()
dd = value / peak - 1
trough = dd.idxmin()
peak_date = value.loc[:trough].idxmax()
return {"Peak": peak_date, "Trough": trough, "Max_Drawdown": float(dd.min())}
def worst_rolling_xirr(ledger, periods: int):
dates = ledger.index
worst = None
for i in range(len(dates) - periods):
start = dates[i]
end = dates[i + periods]
window = ledger.loc[start:end]
if len(window) < max(6, periods // 2):
continue
cashflows = [(d, -float(row["Cash_Added"])) for d, row in window.iterrows()]
cashflows.append((end, float(window["Portfolio_Value"].iloc[-1])))
try:
rx = xirr(cashflows)
if worst is None or rx < worst.get("XIRR", np.inf):
worst = {"Start": start, "End": end, "XIRR": rx}
except Exception:
pass
return worst
# 1) Identify crash windows from NIFTY drawdowns (daily)
nifty_price = prices["NIFTY"].loc[
(prices.index >= SIP_START_DATE) &
((prices.index <= SIP_END_DATE) if SIP_END_DATE else True)
]
peak = nifty_price.cummax()
drawdown = nifty_price / peak - 1.0
CRASH_THRESHOLD = -0.15
in_crash = drawdown <= CRASH_THRESHOLD
crash_windows = []
groups = (in_crash != in_crash.shift()).cumsum()
for _, block in in_crash.groupby(groups):
if block.iloc[0]:
crash_windows.append((block.index[0], block.index[-1]))
print("\n=== CRASH WINDOWS (NIFTY DD <= -15%) ===")
for s, e in crash_windows:
print(s.date(), "->", e.date())
# 2) Sideways windows using ~36M rolling CAGR on daily data
ROLL_MONTHS = 36
SIDEWAYS_CAGR = 0.06
ROLL_DAYS = ROLL_MONTHS * TRADING_DAYS_PER_MONTH
cagr_36 = rolling_cagr(nifty_price, periods=ROLL_DAYS, years=ROLL_MONTHS / 12.0)
in_sideways = cagr_36 <= SIDEWAYS_CAGR
sideways_windows = []
groups = (in_sideways != in_sideways.shift()).cumsum()
for _, block in in_sideways.groupby(groups):
if block.iloc[0]:
sideways_windows.append((block.index[0], block.index[-1]))
print("\n=== SIDEWAYS WINDOWS (~36M CAGR <= 6%) ===")
for s, e in sideways_windows:
print(s.date(), "->", e.date())
# 3) Score each regime window using order-based ledgers
rows = []
for label, windows in [("CRASH", crash_windows), ("SIDEWAYS", sideways_windows)]:
for s, e in windows:
# align to nearest available ledger dates
s2 = sipxar_ledger.index[sipxar_ledger.index.get_indexer([s], method="nearest")[0]]
e2 = sipxar_ledger.index[sipxar_ledger.index.get_indexer([e], method="nearest")[0]]
months_like = (e2.year - s2.year) * 12 + (e2.month - s2.month) + 1
rows.append({
"Regime": label,
"Start": s2.date(),
"End": e2.date(),
"MonthsLike": months_like,
"SIPXAR_XIRR": window_xirr_from_value(sipxar_ledger[["Portfolio_Value"]], s2, e2, SIP_AMOUNT_PER_ORDER),
"NIFTY_SIP_XIRR": window_xirr_from_value(nifty_sip, s2, e2, SIP_AMOUNT_PER_ORDER),
"STATIC_60_40_XIRR": window_xirr_from_value(static_sip, s2, e2, SIP_AMOUNT_PER_ORDER)
})
regime_results = pd.DataFrame(rows)
print("\n=== REGIME PERFORMANCE SUMMARY ===")
if len(regime_results) == 0:
print("No regime windows detected (check thresholds / data range).")
else:
print(regime_results.to_string(index=False))
# =========================================================
# METRIC 1: TIME UNDERWATER
# =========================================================
sipxar_ledger["Underwater"] = (sipxar_ledger["Portfolio_Value"] < sipxar_ledger["Total_Invested"])
periods_underwater = int(sipxar_ledger["Underwater"].sum())
print("\n=== TIME UNDERWATER ===")
print(f"Periods underwater: {periods_underwater} / {len(sipxar_ledger)}")
print(f"% Time underwater : {periods_underwater / len(sipxar_ledger) * 100:.1f}%")
# =========================================================
# METRIC 2: SIP-AWARE MAX DRAWDOWN
# =========================================================
dd_sipxar = sip_max_drawdown(sipxar_ledger)
dd_nifty = sip_max_drawdown(nifty_ledger)
dd_static = sip_max_drawdown(static_ledger)
print("\n=== SIP-AWARE MAX DRAWDOWN ===")
for name, dd in [("SIPXAR", dd_sipxar), ("NIFTY SIP", dd_nifty), ("60/40 SIP", dd_static)]:
print(
f"{name:10s} | "
f"Peak: {dd['Peak'].date()} | "
f"Trough: {dd['Trough'].date()} | "
f"DD: {dd['Max_Drawdown']*100:.2f}%"
)
# =========================================================
# METRIC 3: WORST ROLLING 24M SIP XIRR (ORDER-BASED)
# =========================================================
# Convert 24 months to "order periods" approximately
ORDERS_PER_MONTH = TRADING_DAYS_PER_MONTH / ORDER_EVERY_N
ROLL_24M_PERIODS = int(round(24 * ORDERS_PER_MONTH))
ROLL_24M_PERIODS = max(6, ROLL_24M_PERIODS)
worst_24_sipxar = worst_rolling_xirr(sipxar_ledger, ROLL_24M_PERIODS)
worst_24_nifty = worst_rolling_xirr(nifty_ledger, ROLL_24M_PERIODS)
worst_24_static = worst_rolling_xirr(static_ledger, ROLL_24M_PERIODS)
print("\n=== WORST ROLLING ~24M XIRR (by order periods) ===")
print(f"Using {ROLL_24M_PERIODS} periods (~24 months) given N={ORDER_EVERY_N}")
for name, w in [("SIPXAR", worst_24_sipxar), ("NIFTY SIP", worst_24_nifty), ("60/40 SIP", worst_24_static)]:
if not w or pd.isna(w.get("XIRR", np.nan)):
print(f"{name:10s} | insufficient data")
continue
print(f"{name:10s} | {w['Start'].date()}{w['End'].date()} | {w['XIRR']*100:.2f}%")
# =========================================================
# METRIC 4: PnL VOLATILITY (PER-ORDER)
# =========================================================
period_pnl = sipxar_ledger["Period_PnL"]
pnl_std = float(period_pnl.std())
pnl_mean = float(period_pnl.mean())
print("\n=== PnL VOLATILITY (PER ORDER PERIOD) ===")
print(f"Avg Period PnL : ₹{pnl_mean:,.2f}")
print(f"PnL Std Dev : ₹{pnl_std:,.2f}")
print(f"Volatility % : {pnl_std / SIP_AMOUNT_PER_ORDER * 100:.1f}% of per-order SIP")
# =========================================================
# SIP GRAPH: INVESTED vs PORTFOLIO VALUE (HEADLESS SAFE)
# =========================================================
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
sipxar_ledger["Invested_Capital"] = sipxar_ledger["Total_Invested"]
plt.figure(figsize=(10, 5))
plt.plot(sipxar_ledger.index, sipxar_ledger["Portfolio_Value"], label="Portfolio Value")
plt.plot(sipxar_ledger.index, sipxar_ledger["Invested_Capital"], label="Total Invested")
plt.xlabel("Date")
plt.ylabel("Value (₹)")
plt.title(f"SIPXAR SIP Performance (Every {ORDER_EVERY_N} Trading Days)")
plt.legend()
plt.tight_layout()
plt.savefig("sipxar_performance.png", dpi=150)
plt.close()
print("Plot saved: sipxar_performance.png")