BetterDocs
Home
Docs

Creation | pd.read_excel()

Method:

pd.read_excel(io, sheet_name=0, *, header=0, names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, parse_dates=False, date_parser=<no_default>, date_format=None, thousands=None, decimal='.', comment=None, skipfooter=0, storage_options=None, dtype_backend=<no_default>, engine_kwargs=None)

Reads an Excel file into a DataFrame.

Returns:

pandas.core.frame.DataFrame

Parameters:

io: path-

Path, URL, or file-like object for the Excel file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

# Read the default (first) sheet
df_default = pd.read_excel(io=file_path)
print(df_default)
'''
Output:
   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
'''

sheet_name: (str or array-like), Optional-

Sheet name(s) to read; default is the first sheet (0).

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

sheet2 = wb.create_sheet(title="Sheet2")
sheet2.append(["ID", "City"])
sheet2.append([1, "New York"])
sheet2.append([2, "Los Angeles"])

sheet3 = wb.create_sheet(title="Sheet3")
sheet3.append(["ID", "Score"])
sheet3.append([1, 88.5])
sheet3.append([2, 92.0])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, sheet_name=["Sheet2", "Sheet3"])
for name, df in df.items():
  print(df)
'''
Output:
   ID         City
0   1     New York
1   2  Los Angeles
   ID  Score
0   1   88.5
1   2   92.0
'''

names: array-like, Optional-

Custom column names to use.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, names=["ID", "FName", "Kids"])
print(df)
'''
Output:
   ID  FName  Kids
0   1  Alice    25
1   2    Bob    30
'''

Overrides existing headers for columns.

index_col: None, Optional-

It specifies which column should be used as the index of the resulting DataFrame.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, index_col="ID")
print(df)
'''
Output:
     Name  Age
ID            
1   Alice   25
2     Bob   30
'''

usecols: None, Optional-

It allows you to select specific columns to read from the input file, rather than reading all columns.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, usecols=["Name", "ID"])
print(df)
'''
Output:
   ID   Name
0   1  Alice
1   2    Bob
'''

dtype: data-type, Optional-

Specifies the data-type of the DataFrame. If not provided, it’s inferred from the input.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, dtype={"ID": 'float32'})
print(df)
'''
Output:
    ID   Name  Age
0  1.0  Alice   25
1  2.0    Bob   30
'''

Values: +

engine: ('openpyxl' or 'xlrd' or 'pyxlsb' or 'odf'), Optional-

It specifies the underlying parsing engine to use when reading the file. Default c.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, engine='openpyxl')
print(df)
'''
Output:
   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
'''

When reading a file, pandas uses one of these engines to parse the content. The openpyxl supports newer Excel file formats, the odf supports OpenDocument file formats (.odf, .ods, .odt), the pyxlsb supports Binary Excel files, the xlrd supports old-style Excel files (.xls).

converters: None, Optional-

It allows you to specify custom functions to convert or process values in certain columns while reading the file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])

# Save the workbook
wb.save(file_path)

def half_the_age(age):
    return int(age)/2

# Read the sheets
df = pd.read_excel(io=file_path, converters={"Age": half_the_age})
print(df)
'''
Output:
   ID   Name   Age
0   1  Alice  12.5
1   2    Bob  15.0
'''

true_values: None, Optional-

It is used to specify which string values should be interpreted as boolean True.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age", "isActive"])
sheet1.append([1, "Alice", 25, "yes"])
sheet1.append([2, "Bob", 30, "maybe"])
sheet1.append([2, "Charlie", 10, "no"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, true_values=["yes", "maybe"], false_values=["no"])
print(df)
'''
Output:
   ID     Name  Age  isActive
0   1    Alice   25      True
1   2      Bob   30      True
2   2  Charlie   10     False
'''

For true_values or false_values to work as intended, both of them must contain either of the values in a column.

false_values: None, Optional-

It is used to specify which string values should be interpreted as boolean False.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age", "isActive"])
sheet1.append([1, "Alice", 25, "yes"])
sheet1.append([2, "Bob", 30, "maybe"])
sheet1.append([2, "Charlie", 10, "no"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, true_values=["yes"], false_values=["no", "maybe"])
print(df)
'''
Output:
   ID     Name  Age  isActive
0   1    Alice   25      True
1   2      Bob   30     False
2   2  Charlie   10     False
'''

For true_values or false_values to work as intended, both of them must contain either of the values in a column.

skiprows: None, Optional-

It is used to skip a specified number of rows from the start of the file or from a list of specific row indices.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["random", "random", "random"])
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])
sheet1.append([2, "Charlie", 10])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, skiprows=1)
print(df)
'''
Output:
   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   2  Charlie   10
'''

nrows: None, Optional-

It is used to limit the number of rows that are read from the file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])
sheet1.append([2, "Charlie", 10])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, nrows=2)
print(df)
'''
Output:
   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
'''

na_values: None, Optional-

It is used to specify additional values that should be treated as NaN (Not a Number) while reading the data.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", "na"])
sheet1.append([2, "missing", 30])
sheet1.append([2, "Charlie", 10])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, na_values=["missing", "na"])
print(df)
'''
Output:
   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
'''

Values: +

keep_default_na: (True or False), Optional-

It controls whether or not the default missing values (i.e., NaN values) specified by pandas should be preserved when reading the file.

keep_default_na = True (default) +

keep_default_na = False +

Values: +

If keep_default_na is True, and na_values are specified, na_values is appended to the default NaN values used for parsing.

If keep_default_na is True, and na_values are not specified, only the default NaN values are used for parsing.

If keep_default_na is False, and na_values are specified, only the NaN values specified na_values are used for parsing.

If keep_default_na is False, and na_values are not specified, no strings will be parsed as NaN.

If na_filter  is passed in as <ri>False<ri>, the keep_default_na and na_values parameters will be ignored.

na_filter: (True or False), Optional-

It controls whether pandas should check for missing values (e.g., NaN, NA, null, etc.) during the reading process.

na_filter = True (default) +

na_filter = False +

Values: +

verbose: (True or False), Optional-

It controls whether or not detailed information about the parsing process is displayed.

verbose = False (default) +

verbose = True +

parse_dates: (True or False), Optional-

It is used to specify which columns should be parsed as dates during the reading of the file.

parse_dates = False (default) +

parse_dates = True +

date_parser: (True or False), Optional-

Deprecated since version 2.0.0: A strict version of this argument is now the default, passing it has no effect.

date_format: None, Optional-

It allows you specify the input format of the date when reading.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "BirthDate"])
sheet1.append([1, "Alice", "2003-10-02"])
sheet1.append([2, "Bob", "2003-04-16"])
sheet1.append([2, "Charlie", "2023-12-29"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, parse_dates=['BirthDate'], date_format="%Y-%m-%d")
print(df['BirthDate'])
'''
Output:
0   2003-10-02
1   2003-04-16
2   2023-12-29
Name: BirthDate, dtype: datetime64[ns]
'''

thousands: single char, Optional-

Character acting as the thousands separator in numerical values.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2.000.000"])
sheet1.append([2, "Bob", "10.000.000"])
sheet1.append([2, "Charlie", "200.000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, thousands='.')
print(df)
'''
Output:
   ID     Name    Salary
0   1    Alice   2000000
1   2      Bob  10000000
2   2  Charlie    200000
'''

decimal: single char, Optional-

Character to recognize as decimal point (e.g., use ‘,’ for European data).

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "20000|00"])
sheet1.append([2, "Bob", "1000000|01"])
sheet1.append([2, "Charlie", "2000|5"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, decimal='|')
print(df)
'''
Output:
   ID     Name      Salary
0   1    Alice    20000.00
1   2      Bob  1000000.01
2   2  Charlie     2000.50
'''

comment: single char, Optional-

It is used to specify a character that indicates the beginning of a comment in the data file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000 #commentttt hehe"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, comment='#')
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10000
2   2  Charlie    2000
'''

skipfooter: None, Optional-

It allows you to skip a specified number of rows from the end of the file when reading the data.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 25])
sheet1.append([2, "Bob", 30])
sheet1.append([2, "Charlie", 10])
sheet1.append(["random", "random", "random"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.read_excel(io=file_path, skipfooter=1)
print(df)
'''
Output:
   ID     Name  Age
0   1    Alice   25
1   2      Bob   30
2   2  Charlie   10
'''

storage_options: dict, Optional-

Dictionary of storage-specific options, such as credentials for cloud storage.

dtype_backend: None, Optional-

The dtype_backend parameter is new in Pandas 2.0 which is used to specify the backend for handling the types of data when reading a file.

engine_kwargs: str, Optional-

The engine_kwargs parameter is new in Pandas 2.0 which allows you to pass additional arguments specific to the underlying Excel file engine being used.


Logo

BetterDocs

Support

EmailDiscordForms

Documentations

Python

Company

AboutDocs

Policies

Terms of ServicePrivacy Policy