01-02 Working with multiple stocks

处理多只股票

  • 创建空的DataFrame
#Build a DataFrame in pandas
import pandas as pd
def test_run():
    start_date = '2010-01-22'
    end_date = '2010-01-26'
    dates = pd.date_range(start_date, end_date)
    df1 = pd.DataFrame(index = dates)

if __name__ == "__main__":
    test_run()
  • 连结SPY数据
#SPY(标普500指数) 可以用来参考是不是交易日
import pandas as pd
def test_run():
    #Define date range
    start_date = '2010-01-22'
    end_date = '2010-01-26'
    dates = pd.date_range(start_date, end_date)

    #Create an empty dataframe
    df1 = pd.DataFrame(index = dates)

    #Read SPY data into temporary DataFrame
    dfSPY = pd.read_csv("data/SPY.csv", 
            index_col = "Date", 
            parse_dates = True, 
            usecols = ['Date', 'Adj Close'], 
            na_values = ['nan'])
    
    #Rename 'Adj Close' column to 'SPY' to prevent clash
    dfSPY = dfSPY.rename(columns = {'Adj Close' : 'SPY'})

    #Join the two DataFrames, Drop NaN values
    df1 = df1.join(dfSPY)
    df1 = df1.dropna()
    #也可以用 df1 = df1.join(dfSPY, how = 'inner') 来实现
  • 读取更多股票数据
#Read in more stocks:
symbols = ['GOOG', 'IBM', 'GLD']
df_temp = pd.read_csv("data/{}.csv".format(symbol),
          index_col = 'Date',
          parse_dates = True,
          usecols = ['Date', 'Adj Close'],
          na_values = ['nan'])
#Rename to prevent clash
df_temp = df_temp.rename(columns = {'Adj Close' : symbol})
df1 = df1.join(df_temp)
#use default how = left
  • 读取数据的实用函数
import os
import pandas as pd

def symbol_to_path(symbol, base_dir = "data"):
    #Return CSV file path given ticker symbol
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))

def get_data(symbols, dates)
    #Read stock data (Adj Close) for given syb from csv
    df = pd.DataFrame(index = dates)
    if 'SPY' not in symbols:
        #Add SPY for referance if absent
        #symbols.insert(0, 'SPY')
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol),
                  index_col = 'Date',
                  parse_dates = True,
                  usecols = ['Date', 'Adj Close'],
                  na_values = ['nan'])
        df_temp = df.temp.rename(columns = {'Adj Close' : symbol})
        df = df.join(df_temp)
        if symbol == 'SPY': #drop dates SPY did not trade
        df = df.dropna(subset=["SPY"])

    return df
#More slicing
def test_run():
    #Define a date range
    dates = pd.date_range('2010-01-01', '2010-12-31')

    #Choose stock symbols to read
    symbols = ['GOOG', 'IBM', 'GLD'] 
    #SPY will be added in get_data()

    #Get stock data
    df = get_data(symbols, dates)

    #Slice by row range (dates) using DataFrame.ix[] selector
    print df.ix['2010-01-01':'2010-01-31'] 
    #the month of January
  • 绘制多只股票的图形
def plot_data(df, title = 'Stock prices')
    #plot stock prices
    ax = df.plot(title = title)
    #you can imagine it as an object, axis
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    #ax = df.plot(title = title, fontsize = 2) 可改字体
    plt.show()
  • movement(变动):股票的相对涨跌

  • 标准化 Normalizing
    best way to normalize price data so that all prices start at 1.0:
    df1 = df1 / df1.ix[0]
    or: df1 = df1 / df1.ix[0,i]

def normalize_data(df):
    return df / df.ix[0,i]
  • 切片和绘制两只股票的图形
def plot_selected(df, columns, start_index, end_index)
    plot_data(df.ix[start_index:end_index, columns], 
              title = "Selected data")

pandas可处理带有大量统计函数的ndarry

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • 我的朋友不多,但每个都暖到我心里。 超贴心的。
    你好橘子皮阅读 325评论 0 1
  • 今天儿子自己出了口算题,自己完成了所有的作业,因为今天太累也有点不舒服的原因本想不给他检查作业的,可是又一想不行,...
    新的一天从早上开始阅读 71评论 0 1
  • 前几日闲来无事,翻了翻自己以前的朋友圈,有条说“我有难你鼎力相助,你遇困我义不容辞”。虽然已经记不清事情的具体缘由...
    楚舒大人阅读 468评论 5 3
  • Isophie阅读 218评论 0 2