import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import edhec_risk_kit_206 as erk
df = erk.get_ind_returns(n_inds=49)['1960':]
Annualizing returns allow us to compare returns over different lengths of time. If you are annualizing a return for an investment that took less than a year, you can think of the annualized return as the total return if you had continued seeing that rate of return for a full year.
$r_a = (1 + r_{\text{total}})^{P/n} - 1$
where $P$ is the number of periods per year.
and $n$ is the number of periods.
If we have a set of 25 monthly returns, $P=12$ and $n=25$
If we have a set of 3 quarterly returns, $P=4$ and $n=3$
If we have an investment that returned 9% over 2 years, $P=1$ and $n=2$
For example, if you have an investment that returned 0.5% over one month, the annualized return would be equal to $(1 + 0.005)^{12} - 1 = 6.17$%.
If you have an investment that returned 1% over two months, the annualized return would be equal to $(1 + 0.01)^{12/2} - 1 = 6.15$%.
If you have an investment that returned 15% over 3 years, the annualized return would be equal to $(1 + 0.15)^{1/3} - 1 = 4.77$%.
If you have an investment that retured 15% over 28 months, the annualized return would be equal to $(1 + 0.15)^{12/28} - 1 = 6.17$%.
def annualize_rets(r, periods_per_year):
"""
Annualizes a set of returns
"""
compounded_growth = (1+r).prod()
n_periods = r.shape[0]
return compounded_growth**(periods_per_year/n_periods)-1
Consider a portfolio holding 100% Beer industry stocks.
Calculate the annualized return.
erk.annualize_rets(df['Beer'], periods_per_year=12)
$\sigma_a = \sigma * \sqrt{P}$
Where $\sigma$ is the standard deviation of the returns and $P$ is the number of periods per year.
$\sigma = \sqrt{\frac{\sum_{i=0}^{N}(x_i-\mu)^2}{N-1}}$
def annualize_vol(r, periods_per_year):
"""
Annualizes the vol of a set of returns
"""
return r.std()*(periods_per_year**0.5)
Calculate the annualized volatility of the returns
erk.annualize_vol(df['Beer'], periods_per_year=12)
Drawdown is equal to the value of a portfolio, at a given timestep, subtracted from historical maximum value of the portfolio, expressed as a percentage loss.
def drawdown(return_series: pd.Series):
"""Takes a time series of asset returns.
returns a DataFrame with columns for
the wealth index,
the previous peaks, and
the percentage drawdown
"""
wealth_index = 1000*(1+return_series).cumprod()
previous_peaks = wealth_index.cummax()
drawdowns = (wealth_index - previous_peaks)/previous_peaks
return pd.DataFrame({"Wealth": wealth_index,
"Previous Peak": previous_peaks,
"Drawdown": drawdowns})
Maximum drawdown over the course of an investment.
erk.drawdown(df['Beer'])['Drawdown'].min()*-1
erk.drawdown(df['Beer'])['Drawdown'].idxmin()
Semideviation is the standard deviation among the subset of negative asset returns.
def semideviation(r):
"""
Returns the semideviation aka negative semideviation of r
r must be a Series or a DataFrame, else raises a TypeError
"""
if isinstance(r, pd.Series):
is_negative = r < 0
return r[is_negative].std(ddof=0)
elif isinstance(r, pd.DataFrame):
return r.aggregate(semideviation)
else:
raise TypeError("Expected r to be a Series or DataFrame")
What was the semideviation of the return distribution?
erk.semideviation(df['Beer'])
The conditional value at risk is the expected return given that the return is less than than value at risk.
$\text{CVaR} = -\text{E}(R|R \leq - \text{VaR})$
def cvar_historic(r, level=5):
"""
Computes the Conditional VaR of Series or DataFrame
"""
if isinstance(r, pd.Series):
is_beyond = r <= -var_historic(r, level=level)
return -r[is_beyond].mean()
elif isinstance(r, pd.DataFrame):
return r.aggregate(cvar_historic, level=level)
else:
raise TypeError("Expected r to be a Series or DataFrame")
erk.cvar_historic(df['Beer'], level=5)
The Sharpe ratio for an investment is its excess return divided by the standard deviation of its returns.
def sharpe_ratio(r, riskfree_rate, periods_per_year):
"""
Computes the annualized sharpe ratio of a set of returns
"""
# convert the annual riskfree rate to per period
rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
excess_ret = r - rf_per_period
ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
ann_vol = annualize_vol(r, periods_per_year)
return ann_ex_ret/ann_vol
erk.sharpe_ratio(df['Beer'], riskfree_rate=0.02, periods_per_year=12)
Skewness measures the asymmetry of a probability distribution. It is the third standardized moment of a random variable.
Normal distributions have a skessness of 0.
Example of a negative and positibe skew:
def skewness(r):
"""
Alternative to scipy.stats.skew()
Computes the skewness of the supplied Series or DataFrame
Returns a float or a Series
"""
demeaned_r = r - r.mean()
# use the population standard deviation, so set dof=0
sigma_r = r.std(ddof=0)
exp = (demeaned_r**3).mean()
return exp/sigma_r**3
What is the skewness of this return distribution?
erk.skewness(df['Beer'])
Which industry at the most negatively skewed return distribution?
print(erk.skewness(df).idxmin())
Kurtosis is a measure of the size of the tail of a probability distribution. It is the fourth standardized moment of a random variable.
Normal distributions have a kurtosis of 3.
def kurtosis(r):
"""
Alternative to scipy.stats.kurtosis()
Computes the kurtosis of the supplied Series or DataFrame
Returns a float or a Series
"""
demeaned_r = r - r.mean()
# use the population standard deviation, so set dof=0
sigma_r = r.std(ddof=0)
exp = (demeaned_r**4).mean()
return exp/sigma_r**4
What is the kurtosis of this return distribution?
erk.kurtosis(df['Beer'])
Which industry had a returns distribution with the largest kurtosis?
print(erk.kurtosis(df).idxmax())
Test whether a distribution of data has a skewness and kurtosis that indicate the data matches a normal distribution.
Calculate the test statistics, $JB = \frac{n}{6}(S^2 + \frac{1}{4}(K-3)^2)$ and choose a $p$-value to test at.
def is_normal(r, level=0.01):
"""
Applies the Jarque-Bera test to determine if a Series is normal or not
Test is applied at the 1% level by default
Returns True if the hypothesis of normality is accepted, False otherwise
"""
if isinstance(r, pd.DataFrame):
return r.aggregate(is_normal)
else:
statistic, p_value = scipy.stats.jarque_bera(r)
return p_value > level
Do these monthly returns follow the normal distribution?
erk.is_normal(df['Beer'], level=0.01)
def var_gaussian(r, level=5, modified=False):
"""
Returns the Parametric Gauusian VaR of a Series or DataFrame
If "modified" is True, then the modified VaR is returned,
using the Cornish-Fisher modification
"""
# compute the Z score assuming it was Gaussian
z = norm.ppf(level/100)
if modified:
# modify the Z score based on observed skewness and kurtosis
s = skewness(r)
k = kurtosis(r)
z = (z +
(z**2 - 1)*s/6 +
(z**3 -3*z)*(k-3)/24 -
(2*z**3 - 5*z)*(s**2)/36
)
return -(r.mean() + z*r.std(ddof=0))
erk.var_gaussian(df['Beer'], level=1)
erk.var_gaussian(df['Beer'], level=1, modified=True)
ind_highest_ret = erk.annualize_rets(df, periods_per_year=12).idxmax()
ind_lowest_ret = erk.annualize_rets(df, periods_per_year=12).idxmin()
print('Industry with highest annualized return: {0}'.format(ind_highest_ret))
print('Industry with lowest annualized return: {0}'.format(ind_lowest_ret))
erk.annualize_rets(df, periods_per_year=12).sort_values(ascending=False).plot.bar()
plt.show()
erk.summary_stats(df)