Hi, I'm trying to use Auto_HMM to find upward and downward, and stable states of the stock's close prices time series. This model is supposed to learn to distinguish which period has an upward trend and which one has a downward trend and which hasn't a specific trend(rather monotonic). Below you can see the Adj Close Price data from S&P 500 index between '2010-07-01' and '2011-11-01'.
Three periods are denoted by a line, each with a different color. I want to use this data to feed the HMM model and get the states according to periods. Here is what I have tried so far:
import pandas as pd
from pandas_datareader import data as web
import numpy as np
x = web.get_data_yahoo('^GSPC' , start = '2010-07-01' , end = '2011-11-01')['Adj Close'].rename('close')
x[250:].to_csv("the path")
# ------------------------------------------------------------------------------------------------
# then, I used your code:
from Hidden_Markov_Model import *
from Hidden_Markov_Model.HMM import *
import time
Start=time.time()
Train_ratio=0.1
Cov_Type='diag'
Max_state=3
Max_mixture=2
Iter=1000
Feat=1
# I have 140 records, so I guess N should be equal to 140.
N=140
T=50
flag=1
Path="the path"
Data=pd.read_csv(Path)
Exam_HMM=Supervised_HMM(Train_ratio,Cov_Type,Max_state,Max_mixture,Iter,Feat,N,T,Data,flag)
Exam_HMM.Best_States()
END=time.time()
print('Total Time Takes in seconds',END-Start)
But I got this error:
One mixture component is over 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_7616/86438458.py in <module>
17 Data=pd.read_csv(Path)
18 Exam_HMM=Supervised_HMM(Train_ratio,Cov_Type,Max_state,Max_mixture,Iter,Feat,N,T,Data,flag)
---> 19 Exam_HMM.Best_States()
20 END=time.time()
21 print('Total Time Takes in seconds',END-Start)
c:\Users\Shayan\Desktop\Hidden_Markov_Model\HMM.py in Best_States(self)
146 self.Len=[self.T for ii in range(0,self.Data_train.shape[0])] # Lengths must be list
147 self.Train_Data = np.array(self.Data_train).reshape((-1,1)) # Convert to numpy array with one column
--> 148 self.AIC_BIC()
149 self.Best_BIC()
150
c:\Users\Shayan\Desktop\Hidden_Markov_Model\HMM.py in AIC_BIC(self)
176 # self.num_params=self.Max_state*(self.Max_state-1)+ self.Max_state*(ii-1)+(ii*self.Max_state)*self.Feat+((self.Feat**2+self.Feat)/2)*ii*self.Max_state # Full Covariance
177 self.num_params = self.Max_state*(self.Max_state-1)+ self.Max_state*(ii-1)+(ii*self.Max_state)*self.Feat+(self.Max_state*ii*self.Feat) # Diagonal
--> 178 Model=GMMHMM(n_components=self.Max_state,n_mix=ii,covariance_type=self.Cov_Type,params='stmcw', init_params='stmcw',tol=pow(10,-5),n_iter=self.Iter).fit(self.Train_Data,self.Len)
179 AIC.append(-2 * Model.score(self.Train_Data) + 2 * self.num_params)
180 BIC.append(-2 * Model.score(self.Train_Data) + self.num_params * np.log(self.Train_Data.shape[0]))
~\Anaconda3\envs\Python3.10\lib\site-packages\hmmlearn\base.py in fit(self, X, lengths)
504 stats = self._initialize_sufficient_statistics()
505 curr_log_prob = 0
--> 506 for sub_X in _utils.split_X_lengths(X, lengths):
507 lattice, log_prob, posteriors, fwdlattice, bwdlattice = \
508 impl(sub_X)
~\Anaconda3\envs\Python3.10\lib\site-packages\hmmlearn\_utils.py in split_X_lengths(X, lengths)
14 n_samples = len(X)
15 if cs[-1] > n_samples:
---> 16 raise ValueError(
17 f"more than {n_samples} samples in lengths array {lengths}")
18 elif cs[-1] != n_samples:
ValueError: more than 14 samples in lengths array [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
Can you please help me? How can I achieve my goal?
The expected output in my case:
[0,0,0,0,0,0,0...,0,1,1,1,1,1,1,1,1,...,1,2,2,2,2,2,2,2...,2]
I expect HMM to find those three states(downward trend - monotonic period - upward trend).