BetterDocs

Home

Docs

Creation | pd.ExcelFile.parse()

Previous Next

Method:

pd.ExcelFile.parse(sheet_name=0, header=0, names=None, index_col=None, usecols=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, parse_dates=False, date_parser=<no_default>, date_format=None, thousands=None, comment=None, skipfooter=0, dtype_backend=<no_default>, **kwds)

Parses a specific sheet from an Excel file into a DataFrame.

Returns:

pandas.core.frame.DataFrame

Parameters:

sheet_name: (str or int)-

Sheet name or index to parse; default is 0 (first sheet).

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1')
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10000
2   2  Charlie    2000
'''

header: 0, Optional-

Row number(s) to use as the column names.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', header=0)
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10000
2   2  Charlie    2000
'''

names: array-like, Optional-

Custom column names to use.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', names=["EmpID", "FName", "Kids"])
print(df)
'''
Output:
   EmpID    FName   Kids
0      1    Alice   2000
1      2      Bob  10000
2      2  Charlie   2000
'''

Overrides existing headers for columns.

index_col: None, Optional-

It specifies which column should be used as the index of the resulting DataFrame.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', index_col='ID')
print(df)
'''
Output:
       Name  Salary
ID                 
1     Alice    2000
2       Bob   10000
2   Charlie    2000
'''

usecols: None, Optional-

It allows you to select specific columns to read from the input file, rather than reading all columns.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', usecols=["ID", "Name"])
print(df)
'''
Output:
   ID     Name
0   1    Alice
1   2      Bob
2   2  Charlie
'''

converters: None, Optional-

It allows you to specify custom functions to convert or process values in certain columns while reading the file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Age"])
sheet1.append([1, "Alice", 20])
sheet1.append([2, "Bob", 30])
sheet1.append([2, "Charlie", 10])

# Save the workbook
wb.save(file_path)

def half_the_age(age):
    return int(age)/2

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', converters={"Age": half_the_age})
print(df)
'''
Output:
   ID     Name   Age
0   1    Alice  10.0
1   2      Bob  15.0
2   2  Charlie   5.0
'''

true_values: None, Optional-

It is used to specify which string values should be interpreted as boolean True.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary", "isActive"])
sheet1.append([1, "Alice", "2000", "yes"])
sheet1.append([2, "Bob", "10000", "maybe"])
sheet1.append([2, "Charlie", "2000", "no"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', true_values=["yes", "maybe"], false_values=["no"])
print(df)
'''
Output:
   ID     Name  Salary  isActive
0   1    Alice    2000      True
1   2      Bob   10000      True
2   2  Charlie    2000     False
'''

For true_values or false_values to work as intended, both of them must contain either of the values in a column.

false_values: None, Optional-

It is used to specify which string values should be interpreted as boolean False.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary", "isActive"])
sheet1.append([1, "Alice", "2000", "yes"])
sheet1.append([2, "Bob", "10000", "maybe"])
sheet1.append([2, "Charlie", "2000", "no"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', true_values=["yes"], false_values=["no", "maybe"])
print(df)
'''
Output:
   ID     Name  Salary  isActive
0   1    Alice    2000      True
1   2      Bob   10000     False
2   2  Charlie    2000     False
'''

For true_values or false_values to work as intended, both of them must contain either of the values in a column.

skiprows: None, Optional-

It is used to skip a specified number of rows from the start of the file or from a list of specific row indices.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["random", "random", "random"])
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', skiprows=1)
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10000
2   2  Charlie    2000
'''

nrows: None, Optional-

It is used to limit the number of rows that are read from the file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', nrows=2)
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10000
'''

na_values: None, Optional-

It is used to specify additional values that should be treated as NaN (Not a Number) while reading the data.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "na"])
sheet1.append([2, "missing", "10000"])
sheet1.append([2, "Charlie", "2000"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', na_values=["missing", "na"])
print(df)
'''
Output:
   ID     Name   Salary
0   1    Alice      NaN
1   2      NaN  10000.0
2   2  Charlie   2000.0
'''

Values: +

parse_dates: (True or False), Optional-

It is used to specify which columns should be parsed as dates during the reading of the file.

parse_dates = False (default) +

Pandas will not attempt to parse any columns as dates. If any date strings are present, they will be read as plain text.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "BirthDate"])
sheet1.append([1, "Alice", "2003-10-02"])
sheet1.append([2, "Bob", "2003-04-16"])
sheet1.append([2, "Charlie", "2023-12-29"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', parse_dates=False)
print(df['BirthDate'])
'''
Output:
0    2003-10-02
1    2003-04-16
2    2023-12-29
Name: BirthDate, dtype: object
'''

parse_dates = True +

Pandas will attempt to parse all columns with date-like values (e.g., strings in the format "YYYY-MM-DD") into datetime objects.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "BirthDate"])
sheet1.append([1, "Alice", "2003-10-02"])
sheet1.append([2, "Bob", "2003-04-16"])
sheet1.append([2, "Charlie", "2023-12-29"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', parse_dates=['BirthDate'])
print(df['BirthDate'])
'''
Output:
0   2003-10-02
1   2003-04-16
2   2023-12-29
Name: BirthDate, dtype: datetime64[ns]
'''

date_parser: (True or False), Optional-

Deprecated since version 2.0.0: A strict version of this argument is now the default, passing it has no effect.

date_format: None, Optional-

It allows you specify the input format of the date when reading.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "BirthDate"])
sheet1.append([1, "Alice", "2003-10-02"])
sheet1.append([2, "Bob", "2003-04-16"])
sheet1.append([2, "Charlie", "2023-12-29"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', parse_dates=['BirthDate'], date_format='%Y-%m-%d')
print(df['BirthDate'])
'''
Output:
0   2003-10-02
1   2003-04-16
2   2023-12-29
Name: BirthDate, dtype: datetime64[ns]
'''

thousands: single char, Optional-

Character acting as the thousands separator in numerical values.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2|000"])
sheet1.append([2, "Bob", "10|000"])
sheet1.append([2, "Charlie", "200"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', thousands='|')
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10000
2   2  Charlie     200
'''

comment: single char, Optional-

It is used to specify a character that indicates the beginning of a comment in the data file.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000 #commentttt hehe"])
sheet1.append([2, "Bob", "10050"])
sheet1.append([2, "Charlie", "232564"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', comment='#')
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10050
2   2  Charlie  232564
'''

skipfooter: None, Optional-

It allows you to skip a specified number of rows from the end of the file when reading the data.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "2000"])
sheet1.append([2, "Bob", "10050"])
sheet1.append([2, "Charlie", "232564"])
sheet1.append(["random", "random", "random"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', skipfooter=1)
print(df)
'''
Output:
   ID     Name  Salary
0   1    Alice    2000
1   2      Bob   10050
2   2  Charlie  232564
'''

dtype_backend: None, Optional-

The dtype_backend parameter is new in Pandas 2.0 which is used to specify the backend for handling the types of data when reading a file.

**kwds Optional-

This parameter allows you to pass additional keyword arguments that are forwarded to the underlying pd.read_excel method.

import pandas as pd
from openpyxl import Workbook

# Create an Excel file with multiple sheets
file_path = "example.xlsx"

# Create a workbook and add data
wb = Workbook()
sheet1 = wb.active
sheet1.title = "Sheet1"
sheet1.append(["ID", "Name", "Salary"])
sheet1.append([1, "Alice", "20.00"])
sheet1.append([2, "Bob", "100.50"])
sheet1.append([2, "Charlie", "2325.64"])

# Save the workbook
wb.save(file_path)

# Read the sheets
df = pd.ExcelFile(path_or_buffer=file_path).parse(sheet_name='Sheet1', decimal='.')
print(df)
'''
Output:
   ID     Name   Salary
0   1    Alice    20.00
1   2      Bob   100.50
2   2  Charlie  2325.64
'''

Previous Next

BetterDocs

Support

EmailDiscordForms

Documentations

Python

Company

AboutDocs

Policies

Terms of ServicePrivacy Policy

Creation | pd.ExcelFile.parse()

Method:

Parses a specific sheet from an Excel file into a DataFrame.

Returns:

pandas.core.frame.DataFrame

Parameters:

sheet_name: (str or int)-

header: 0, Optional-

names: array-like, Optional-

index_col: None, Optional-

usecols: None, Optional-

converters: None, Optional-

true_values: None, Optional-

false_values: None, Optional-

skiprows: None, Optional-

nrows: None, Optional-

na_values: None, Optional-

Values: +

NaN Values

parse_dates: (True or False), Optional-

parse_dates = False (default) +

parse_dates = True +

date_parser: (True or False), Optional-

date_format: None, Optional-

thousands: single char, Optional-

comment: single char, Optional-

skipfooter: None, Optional-

dtype_backend: None, Optional-

**kwds Optional-

BetterDocs

Support

Documentations

Company

Policies