Any valid string path is acceptable. Only supports the local file system, remote URLs and file-like objects are not supported.
import pandas as pd
import requests
# URL to the HDF5 file
url = "https://betterdocs.tech/global/python/pandas/data_large.h5"
# Download the file
response = requests.get(url)
with open("data.h5", "wb") as f:
f.write(response.content)
# Read the HDF5 file locally
df = pd.read_hdf(path_or_buf="data.h5")
print(df.head())
'''
Output:
id name birthdate age salary
0 1 Eric Arias 1979-05-22 45 102905
1 2 Billy Silva 1998-03-13 26 87949
2 3 David Goodwin 1974-09-07 50 34936
3 4 Wendy Jones DVM 1968-07-23 56 67408
4 5 Becky Butler 1995-11-30 29 134267
'''
It is used to specify the group name in the HDF5 file from which the data should be read.
import pandas as pd
# Create two DataFrames
df1 = pd.DataFrame({'id': [1, 2], 'name': ['Alice', 'Bob']})
df2 = pd.DataFrame({'id': [3, 4], 'name': ['Chloe', 'David']})
# Write DataFrames to an HDF5 file under different keys
df1.to_hdf('data.h5', key='group1', mode='w')
df2.to_hdf('data.h5', key='group2', mode='a')
# Read the dataset under 'group1'
df_group1 = pd.read_hdf('data.h5', key='group1')
print(df_group1)
# Read the dataset under 'group2'
df_group2 = pd.read_hdf('data.h5', key='group2')
print(df_group2)
'''
Output:
id name
0 1 Alice
1 2 Bob
id name
0 3 Chloe
1 4 David
'''
Mode to use when opening the file.
Specifies how encoding and decoding errors are to be handled.
Query for filtering rows based on conditions.
import pandas as pd
# Create sample DataFrame
data = {
'id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Chloe', 'David', 'Eva'],
'salary': [75000, 80000, 85000, 90000, 95000],
'department': ['HR', 'Engineering', 'Marketing', 'Engineering', 'HR']
}
df = pd.DataFrame(data)
df.to_hdf('employees_table.h5', key='data', mode='w', data_columns=True, format='table')
# Use the 'where' parameter to filter data with salary > 80000
df_filtered = pd.read_hdf('employees_table.h5', key='data', where="id=2")
# Print the filtered DataFrame
print(df_filtered)
'''
Output:
id name salary department
1 2 Bob 80000 Engineering
'''
Row index to start reading from.
import pandas as pd
import requests
# URL to the HDF5 file
url = "https://betterdocs.tech/global/python/pandas/data_large.h5"
# Download the file
response = requests.get(url)
with open("data.h5", "wb") as f:
f.write(response.content)
# Read the HDF5 file locally
df = pd.read_hdf(path_or_buf="data.h5", start=2)
print(df.head())
'''
Output:
id name birthdate age salary
2 3 David Goodwin 1974-09-07 50 34936
3 4 Wendy Jones DVM 1968-07-23 56 67408
4 5 Becky Butler 1995-11-30 29 134267
5 6 Heather Frank 1985-12-13 39 48576
6 7 John Hicks 1978-12-03 46 49362
'''
Row index to stop reading at.
import pandas as pd
import requests
# URL to the HDF5 file
url = "https://betterdocs.tech/global/python/pandas/data_large.h5"
# Download the file
response = requests.get(url)
with open("data.h5", "wb") as f:
f.write(response.content)
# Read the HDF5 file locally
df = pd.read_hdf(path_or_buf="data.h5", stop=3)
print(df)
'''
Output:
id name birthdate age salary
0 1 Eric Arias 1979-05-22 45 102905
1 2 Billy Silva 1998-03-13 26 87949
2 3 David Goodwin 1974-09-07 50 34936
'''
Subset of columns to load.
import pandas as pd
# Create sample DataFrame
data = {
'id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Chloe', 'David', 'Eva'],
'salary': [75000, 80000, 85000, 90000, 95000],
'department': ['HR', 'Engineering', 'Marketing', 'Engineering', 'HR']
}
df = pd.DataFrame(data)
df.to_hdf('employees_table.h5', key='data', mode='w', data_columns=True, format='table')
df_filtered = pd.read_hdf('employees_table.h5', columns=["id", "name"])
# Print the filtered DataFrame
print(df_filtered)
'''
Output:
id name
0 1 Alice
1 2 Bob
2 3 Chloe
3 4 David
4 5 Eva
'''
Return an iterator object.
Retuns an entire DataFrame at once.
import pandas as pd
# Create sample DataFrame
data = {
'id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Chloe', 'David', 'Eva'],
'salary': [75000, 80000, 85000, 90000, 95000],
'department': ['HR', 'Engineering', 'Marketing', 'Engineering', 'HR']
}
df = pd.DataFrame(data)
# Save to HDF5 in table format
df.to_hdf('bb.h5', key='data', mode='w', data_columns=True, format='table')
# Read HDF5 without iterator
df_filtered = pd.read_hdf('bb.h5', iterator=False)
print(df_filtered)
'''
Output:
id name salary department
0 1 Alice 75000 HR
1 2 Bob 80000 Engineering
2 3 Chloe 85000 Marketing
3 4 David 90000 Engineering
4 5 Eva 95000 HR
'''
It indicates that the method will return an iterator object instead of a DataFrame.
import pandas as pd
# Create sample DataFrame
data = {
'id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Chloe', 'David', 'Eva'],
'salary': [75000, 80000, 85000, 90000, 95000],
'department': ['HR', 'Engineering', 'Marketing', 'Engineering', 'HR']
}
df = pd.DataFrame(data)
# Save to HDF5 in table format
df.to_hdf('bb.h5', key='data', mode='w', data_columns=True, format='table')
# Read HDF5 with chunks (iterator=True, chunksize=3)
df_filtered = pd.read_hdf('bb.h5', iterator=True, chunksize=3)
for chunk in df_filtered:
print(chunk)
'''
Output:
id name salary department
0 1 Alice 75000 HR
1 2 Bob 80000 Engineering
2 3 Chloe 85000 Marketing
id name salary department
3 4 David 90000 Engineering
4 5 Eva 95000 HR
'''
chunksize must be specified when <ri>iterator=True<ri>.
Number of rows to read per chunk.
import pandas as pd
# Create sample DataFrame
data = {
'id': [1, 2, 3, 4, 5],
'name': ['Alice', 'Bob', 'Chloe', 'David', 'Eva'],
'salary': [75000, 80000, 85000, 90000, 95000],
'department': ['HR', 'Engineering', 'Marketing', 'Engineering', 'HR']
}
df = pd.DataFrame(data)
# Save to HDF5 in table format
df.to_hdf('bb.h5', key='data', mode='w', data_columns=True, format='table')
# Read HDF5 with chunks (iterator=True, chunksize=4)
df_filtered = pd.read_hdf('bb.h5', iterator=True, chunksize=4)
for c in df_filtered:
print(c)
'''
Output:
id name salary department
0 1 Alice 75000 HR
1 2 Bob 80000 Engineering
2 3 Chloe 85000 Marketing
3 4 David 90000 Engineering
id name salary department
4 5 Eva 95000 HR
'''
Additional arguments for lower-level HDF5 functionality.