Read S&P 500 list of companies from Wikipedia and retrieve stock data using yfinance


Load the list of S&P 500 companies from Wikepedia

import pandas as pd

def load_data(url):
    html = pd.read_html(url, header=0)
    return html
# Load the list of S&P 500 companies
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
df = load_data(url)[0]
df.head()

SymbolSecurityGICS SectorGICS Sub-IndustryHeadquarters LocationDate addedCIKFounded
0MMM3MIndustrialsIndustrial ConglomeratesSaint Paul, Minnesota1957-03-04667401902
1AOSA. O. SmithIndustrialsBuilding ProductsMilwaukee, Wisconsin2017-07-26911421916
2ABTAbbottHealth CareHealth Care EquipmentNorth Chicago, Illinois1957-03-0418001888
3ABBVAbbVieHealth CareBiotechnologyNorth Chicago, Illinois2012-12-3115511522013 (1888)
4ACNAccentureInformation TechnologyIT Consulting & Other ServicesDublin, Ireland2011-07-0614673731989

Examing the data

# Check the unique business Sectors of S&P 500 companies
sector_unique = df['GICS Sector'].unique()
sector_unique
array(['Industrials', 'Health Care', 'Information Technology',
       'Utilities', 'Financials', 'Materials', 'Consumer Discretionary',
       'Real Estate', 'Communication Services', 'Consumer Staples',
       'Energy'], dtype=object)
# Group by Sector
sector = df.groupby('GICS Sector').agg(Count=('Symbol', 'size')).reset_index()
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style = "whitegrid")

# Initialize the matplotlib figure
f, ax = plt.subplots(figsize=(15,8))

# Sort the sector dataset
sector = sector.sort_values('Count', ascending=False)

# Plot the sector counts
sns.set_color_codes("pastel")
sns.barplot(x='GICS Sector', y='Count', data=sector, label='Companies per Sector', color='b')

# Add a informative axis label
#ax.legend(ncol=2, loc='lower right', frameon=True)
#ax.set(ylim=(0,80), ylabel="Company's Number per Sector",
#       xlabel='Sector')
ax.set_xlabel('Sector', fontweight='bold', fontsize=18)
ax.set_ylabel('No. of Companies', fontweight='bold', fontsize=18)
ax.set_xticklabels(ax.get_xticklabels(), rotation=40, ha="right")
ax.set_title('No. of Companies in S&P 500 per Sector', fontsize=24, fontweight='bold')
sns.despine(left=True, bottom=True)

No. of Companies in S&P 500 per Sector

Retrieve stock data using yfinance

import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

data = yf.download(
            # tickers list
            tickers = list(df['Symbol']),
            # valid periods: 1d, 5d, 1mo, 3mo, 6mo, 1y, 5y, 10y, ytd, max
            period = '1y',
            # valid intervals: 1m, 2m, 5m, 15m, 15m, 30m, 60m, 90m, 1h, 1d, 5d, 1wk, 1mo, 3mo
            interval = '1d',
            # group by ticker
            group_by = 'ticker',
            # adjust all OHLC automatically
            auto_adjust = True,
            # download pre/post regular market hours data
            prepost = True,
            # use threads for mass downloading
            threads = True,
            # proxy URL scheme when downloading
            proxy = None
            )
[*********************100%%**********************]  503 of 503 completed

2 Failed downloads:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (period=1y)')
['BRK.B']: Exception('%ticker%: No data found, symbol may be delisted')
data['AAPL'].head()

PriceOpenHighLowCloseVolume
Date
2023-02-24146.328931146.408508144.946312145.93106155469600
2023-02-27146.925752148.377992146.667123147.13462844998500
2023-02-28146.269261148.288481146.050427146.62735050547000
2023-03-01146.050447146.448318144.240103144.53851355479000
2023-03-02143.613439145.931070143.135977145.13531552238100
# Plot close price of AAPL
df_aapl = pd.DataFrame(data['AAPL'].Close)

plt.fill_between(df_aapl.index, df_aapl.Close, color='skyblue', alpha=0.3)
plt.plot(df_aapl.index, df_aapl.Close, color='skyblue', alpha=0.8)
plt.xticks(rotation=90)
plt.xlabel('Date')
plt.ylabel('Closing Price of AAPL')
Text(0, 0.5, 'Closing Price of AAPL')

Closing Price of AAPL

# Create a function to plot the closing price of the stock
def price_plot(symbol):
    df_symbol = pd.DataFrame(data[symbol].Close)
    plt.fill_between(df_symbol.index, df_symbol.Close, color='skyblue', alpha=0.3)
    plt.plot(df_symbol.index, df_symbol.Close, color='skyblue', alpha=0.8)
    plt.title(symbol, fontweight='bold')
    plt.xticks(rotation=90)
    plt.xlabel('Date', fontweight='bold')
    plt.ylabel('Closing Price', fontweight='bold')
    return plt.show()
price_plot('GOOGL')

Closing Price of GOOGL


Author: wenvenn
Reprint policy: All articles in this blog are used except for special statements CC BY 4.0 reprint policy. If reproduced, please indicate source wenvenn !