Create Your Own Abstract Data Reader Object

Base on the available stock data sources we have, we should create our own data reader in the abstract data structure, which means we will hide the data getting process but only takes in command and give out standardized data table.

Here are all the dependencies we need:

from enum import Enum
from datetime import datetime, timedelta
import pandas as pd
import time
from IPython.display import clear_output
import tqdm
import requests as re
import json

Things need to mentions is that Json is used to wrap/format web request data into dataframe. tqdm is a package for progress bar display.

Our goal is to build a class object to get data, let build the constructor first. Since getting data from sources involves many arguments, one of the best practice for me is to initialize those parameters in the constructor.

     def __init__(self,tic_list, output="table", **kwargs):
        self.arg_list = {"freq": 'minutes',"start_date": datetime.now()-timedelta(days =256),"end_date":datetime.now(), "timeframe": 256, "file_name":""}
        
        self.tic_list = tic_list
        self.output = output
        self.arg_list["start_date"] 
        
        
        for key , arg in kwargs.iteritems():
            
            if key in ["freq","start_date","end_date"]:
                self.arg_list[key]=arg
            
            if key in ["timeframe"]:
                self.arg_list[key]=arg
                self.arg_list["start_date"] = datetime.now()-timedelta(days =arg)

In the constructor, I take in a list of tickers and a couple of data format setting variables. They include frequency, start date and end date. I also set a variable to control output type, which gives me the flexibility to choose data storage options. Later, I will talk about using mongo database to store stock data.

Next, it’s the most important part, data query function.

The first one is the historical data query function, get_ondemand_data()

def get_ondemand_data(self, interval = 1):
         
         self.result = pd.DataFrame()
         
         for i in tqdm.tqdm(range(len(self.tic_list))):
             trial = 0
             i = self.tic_list[i]
             while trial <3:
                 try:
                     api_key = '95b5894daf3abced33fe48e7f265315e'
                     start_date=self.arg_list["start_date"].strftime("%Y%m%d%H%M%S")
                     end_date=self.arg_list["end_date"].strftime("%Y%m%d%H%M%S")
                     # This is the required format for datetimes to access the API

                     api_url = 'http://marketdata.websol.barchart.com/getHistory.csv?' + \
                                             'key={}&symbol={}&type={}&startDate={}&endDate={}&interval={}'\
                                              .format(api_key, i, self.arg_list["freq"], start_date,end_date,interval)

                     temp = pd.read_csv(api_url, parse_dates=['timestamp'])
                     temp.set_index('timestamp', inplace=True)



                     #index= pd.MultiIndex.from_product([[i],temp.index])
                     #temp=pd.DataFrame(data=temp.values,index=index,columns=temp.columns)

                     self.result = self.result.append(temp)
                     clear_output()
                     print "Finished", i
                     
                     #time.sleep(5)
                     trial=3

                 except Exception as e:
                     print e
                     print "error occorded in getting data for ", i
                     trial +=1
                     time.sleep(10)
                     if trial == 3:
                         self.error.append([i,'get_ondemand'])
         return self.data_output()

I won’t go deep on this, but please be noticed that I add an out loop for retries of data requests in case connection error occurs.

The second one is the get stock quote function, get_quote()

def get_quote(self):
   
   self.result = pd.DataFrame()
   
   for i in tqdm.tqdm(range(len(self.tic_list))):
       i = self.tic_list[i]

   profile="https://financialmodelingprep.com/api/company/price/{}".format(i)

   temp = re.get(profile, verify=False).text

   temp=self.result.replace("\n","")

   temp = self.result.replace("<pre>","")

   temp= json.loads(result)

   temp = pd.DataFrame(result).transpose()
   
   self.result = self.result.append(temp)
   
   self.data_output()

Lastly, we need to create a function for standradize output.

def data_output(self):
  
   self.result = self.result.reset_index()
   self.result["Close"] = self.result["close"]
   self.result = self.result.rename(columns={'symbol':'Ticker','timestamp':"TimeStamp","high":"High","low":"Low","open":"Open","volume":"Volume"})
   self.result["Return"]=( self.result.Close.diff(1)/self.result.Close)
   
   if self.output == "table":
       
       return self.result
    
   if self.output == "file":
       self.result.to_csv(self.arg_list["file_name"])

Put them all together. You can save them into a .py file so that next time when you use them, you can just import the .py file name.




Create Your Own Abstract Data Reader Object







Base on the available stock data sources we have, we should create our own data reader in abstract data structure, which means we will hide the data getting process but only takes in command and give out standardized data table.

In [34]:
from enum import Enum
from datetime import datetime, timedelta
import pandas as pd
import time
from IPython.display import clear_output
import tqdm
import requests as re
import json
In [ ]:
class get_stock_data():
    
     def __init__(self,tic_list, output="table", **kwargs):
        self.arg_list = {"freq": 'minutes',"start_date": datetime.now()-timedelta(days =256),\
                    "end_date":datetime.now(), "day_range": 256, "file_name":""}
        
        self.tic_list = tic_list
        self.output = output
        self.arg_list["start_date"] 
        
        
        for key , arg in kwargs.iteritems():
            
            if key in ["freq","start_date","end_date"]:
                self.arg_list[key]=arg
            
            if key in ["timeframe"]:
                self.arg_list[key]=arg
                self.arg_list["start_date"] = datetime.now()-timedelta(days =arg)
    
        self.error = []
    
     def data_output(self):
       
        self.result = self.result.reset_index()
        self.result["Close"] = self.result["close"]
        self.result = self.result.rename(columns={'symbol':'Ticker','timestamp':"TimeStamp","high":"High","low":"Low","open":"Open","volume":"Volume"})
        self.result["Return"]=( self.result.Close.diff(1)/self.result.Close)
        
        if self.output == "table":
            
            return self.result
    
        if self.output == "file":
            self.result.to_csv(self.arg_list["file_name"])
     
        

    
     def get_ondemand_data(self, interval = 1):
            
            self.result = pd.DataFrame()
            
            for i in tqdm.tqdm(range(len(self.tic_list))):
                trial = 0
                i = self.tic_list[i].upper()
                while trial <3:
                    try:
                        api_key = '95b5894daf3abced33fe48e7f265315e'
                        start_date=self.arg_list["start_date"].strftime("%Y%m%d%H%M%S")
                        end_date=self.arg_list["end_date"].strftime("%Y%m%d%H%M%S")
                        # This is the required format for datetimes to access the API

                        api_url = 'http://marketdata.websol.barchart.com/getHistory.csv?' + \
                                                'key={}&symbol={}&type={}&startDate={}&endDate={}&interval={}'\
                                                 .format(api_key, i, self.arg_list["freq"], start_date,end_date,interval)

                        temp = pd.read_csv(api_url, parse_dates=['timestamp'])
                        temp.set_index('timestamp', inplace=True)



                        #index= pd.MultiIndex.from_product([[i],temp.index])
                        #temp=pd.DataFrame(data=temp.values,index=index,columns=temp.columns)

                        self.result = self.result.append(temp)
                        clear_output()
                        print "Finished", i
                        
                        #time.sleep(5)
                        trial=3

                    except Exception as e:
                        print e
                        print "error occorded in getting data for ", i
                        trial +=1
                        time.sleep(10)
                        if trial == 3:
                            self.error.append([i,'get_ondemand'])
            return self.data_output()
           
            
            
     def get_quote(self):
        
        self.result = pd.DataFrame()
        
        for i in tqdm.tqdm(range(len(self.tic_list))):
            i = self.tic_list[i].upper()

        profile="https://financialmodelingprep.com/api/company/price/{}".format(i)

        temp = re.get(profile, verify=False).text

        temp=self.result.replace("\n","")

        temp = self.result.replace("<pre>","")

        temp= json.loads(result)

        temp = pd.DataFrame(result).transpose()
        
        self.result = self.result.append(temp)
        
        self.data_output()
            
            
         
        
    
In [38]:
my = get_stock_data(["AAPL"],day_range=2)
my.get_ondemand_data().head()
Finished AAPL

100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:03<00:00,  3.24s/it]

Out[38]:
TimeStamp Ticker tradingDay Open High Low close Volume Close Return
0 2018-09-21 13:30:00 AAPL 2018-09-21 220.78 221.0988 220.6601 221.0599 8036258 221.0599 NaN
1 2018-09-21 13:31:00 AAPL 2018-09-21 221.05 221.1700 220.1900 220.4000 153898 220.4000 -0.002994
2 2018-09-21 13:32:00 AAPL 2018-09-21 220.42 220.4600 220.1200 220.2200 160661 220.2200 -0.000817
3 2018-09-21 13:33:00 AAPL 2018-09-21 220.22 220.5650 219.9509 220.4600 257272 220.4600 0.001089
4 2018-09-21 13:34:00 AAPL 2018-09-21 220.47 220.7000 220.2906 220.6101 129467 220.6101 0.000680