When I shift my time series data, I get some NaNs in the dataframe. The only interpolation method that can replace these NaNs with numbers is 'linear'. The NaN are replaced by the same number, which isn't preferable.
Is there some way to instead use a different method like 'cubic' or 'quadratic'?
import numpy as np
import pandas as pd
# original data
df = pd.DataFrame()
np.random.seed(0)
days = pd.date_range(start='2015-01-01', end='2015-01-10', freq='1D')
df = pd.DataFrame({'Date': days, 'col1': np.random.randn(len(days))})
df = df.set_index('Date')
# add lags
df['lag1'] = df['col1'].shift(1)
df['lag3'] = df['col1'].shift(3)
print(df)
def interp(dfObj):
if dfObj.isna().sum()>0:
dfObj0 = dfObj.interpolate(method='linear', limit_direction='both')
return dfObj0
else:
return dfObj
df['lag1'] = interp(df['lag1'])
df['lag3'] = interp(df['lag3'])
print(df)