Path to the file or file-like object to read.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": [25, 30, 22],
"City": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path)
print(df)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
Expected JSON data format.
JSON should be a dictionary where keys are column names and values are lists of column values.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": [25, 30, 22],
"City": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, orient="columns")
print(df)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
JSON should have a dictionary with the keys: index, columns, and data.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"index": [0, 1, 2],
"columns": ["Name", "Age", "City"],
"data": [["Alice", 25, "New York"], ["Bob", 30, "Los Angeles"], ["Chloe", 22, "Chicago"]]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, orient="split")
print(df)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
JSON should be a list of dictionaries, where each dictionary represents a row.
import pandas as pd
import json
# Create a sample JSON object
sample_json = [
{"Name": "Alice", "Age": 25, "City": "New York"},
{"Name": "Bob", "Age": 30, "City": "Los Angeles"},
{"Name": "Chloe", "Age": 22, "City": "Chicago"}
]
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, orient="records")
print(df)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
JSON should be a dictionary of dictionaries, with keys as row indices and values as dictionaries representing columns.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"0": {"Name": "Alice", "Age": 25, "City": "New York"},
"1": {"Name": "Bob", "Age": 30, "City": "Los Angeles"},
"2": {"Name": "Chloe", "Age": 22, "City": "Chicago"}
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, orient="index")
print(df)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
JSON should be a list of lists, where each inner list represents a row of data.
import pandas as pd
import json
# Create a sample JSON object
sample_json = [
["Alice", 25, "New York"],
["Bob", 30, "Los Angeles"],
["Chloe", 22, "Chicago"]
]
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, orient="values")
print(df)
'''
Output:
0 1 2
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
JSON should have a dictionary with the keys: schema, and data.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"schema": {
"fields": [
{"name": "ID", "type": "integer"},
{"name": "Name", "type": "string"},
{"name": "Age", "type": "integer"}
],
"primaryKey": ["ID"],
"pandas_version": "2.2"
},
"data": [
{"ID": 1, "Name": "Alice", "Age": 25},
{"ID": 2, "Name": "Bob", "Age": 30},
{"ID": 3, "Name": "Chloe", "Age": 22}
]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, orient="table")
print(df)
'''
Output:
Name Age
ID
1 Alice 25
2 Bob 30
3 Chloe 22
'''
The type of object to recover.
Returns a DataFrame object.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": [25, 30, 22],
"City": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, typ='frame')
print(df)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
Returns a Series object.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": [25, 30, 22],
"City": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into Series
s = pd.read_json(path_or_buf=json_file_path, typ='series')
print(s)
'''
Output:
Name [Alice, Bob, Chloe]
Age [25, 30, 22]
City [New York, Los Angeles, Chicago]
dtype: object
'''
Datatype for the object that is to be returned.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": [25, 30, 22],
"City": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, dtype={'Age': 'float32'})
print(df)
'''
Output:
Name Age City
0 Alice 25.0 New York
1 Bob 30.0 Los Angeles
2 Chloe 22.0 Chicago
'''
Convert axis data types.
Axes are automatically converted to numeric types where possible.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"0": ["Alice", "Bob", "Chloe"],
"1": ["25", "30", "22"],
"2": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, convert_axes=True)
print(df)
'''
Output:
0 1 2
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
Axes remain as their original types (often strings) regardless of whether they can be converted to numeric.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"0": ["Alice", "Bob", "Chloe"],
"1": ["25", "30", "22"],
"2": ["New York", "Los Angeles", "Chicago"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, convert_axes=False)
print(df)
'''
Output:
0 1 2
0 Alice 25 New York
1 Bob 30 Los Angeles
2 Chloe 22 Chicago
'''
Parse dates from strings.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": ["25", "30", "22"],
"BirthDates": ["2003-10-02", "2003-04-16", "2023-12-29"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, convert_dates=['BirthDates'])
print(df['BirthDates'])
'''
Output:
0 2003-10-02
1 2003-04-16
2 2023-12-29
Name: BirthDates, dtype: datetime64[ns]
'''
Keep default datetime parsing.
The default date-like columns should be converted into datetime64[ns].
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": ["25", "30", "22"],
"timestamp": ["2003-10-02", "2003-04-16", "2023-12-29"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, keep_default_dates=True)
print(df['timestamp'])
'''
Output:
0 2003-10-02
1 2003-04-16
2 2023-12-29
Name: timestamp, dtype: datetime64[ns]
'''
The default date-like columns remain as object (string) dtype.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": ["25", "30", "22"],
"timestamp": ["2003-10-02", "2003-04-16", "2023-12-29"]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, keep_default_dates=False)
print(df['timestamp'])
'''
Output:
0 2003-10-02
1 2003-04-16
2 2023-12-29
Name: timestamp, dtype: object
'''
Only if the label "ends with _at" or "ends with _time" or "begins with timestamp" or "is equal to date" or "is equal to modified", it is converted to dates when set to True.
Use precise floating-point conversion (False by default).
Unit of time for datetime parsing (e.g., 's', 'ms').
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": ["25", "30", "22"],
"Timestamp": [1065117600, 1050480000, 1703876400]
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, date_unit='s')
print(df)
'''
Output:
Name Age Timestamp
0 Alice 25 2003-10-02 18:00:00
1 Bob 30 2003-04-16 08:00:00
2 Chloe 22 2023-12-29 19:00:00
'''
It specifies the character encoding to use when reading a JSON file.
import pandas as pd
import json
# Create a sample JSON object
sample_json = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": ["25", "30", "22"],
}
# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w", encoding="utf-8") as json_file:
json.dump(sample_json, json_file, indent=2)
# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, encoding="utf-8")
print(df)
'''
Output:
Name Age
0 Alice 25
1 Bob 30
2 Chloe 22
'''
It allows you to control how encoding errors are handled during the reading process.
import pandas as pd
import json
# Create a sample JSON object with an intentional encoding issue
sample_json = {
"Name": ["Alice", "Bob", "Chloé"],
"City": ["München", "Berlin", "Hamburg"]
}
# Add an invalid character intentionally into the JSON for testing
# e.g., introducing a broken character in the name of the city
sample_json["City"][0] = "Münchën" # 'ü' becomes 'ë' as an invalid byte
# Write the JSON data to a file
json_file_path = "sample_data_with_invalid_encoding.json"
with open(json_file_path, "w", encoding="utf-8") as json_file:
json.dump(sample_json, json_file, ensure_ascii=False, indent=2)
# Read the JSON file with encoding_errors='strict' (default)
try:
df_strict = pd.read_json(json_file_path, encoding='ascii', encoding_errors='strict')
print("With encoding_errors='strict':")
print(df_strict)
except UnicodeDecodeError as e:
print("Error with encoding_errors='strict':", e)
# Read the JSON file with encoding_errors='ignore'
df_ignore = pd.read_json(json_file_path, encoding='ascii', encoding_errors='ignore')
print("\nWith encoding_errors='ignore':")
print(df_ignore)
# Read the JSON file with encoding_errors='replace'
df_replace = pd.read_json(json_file_path, encoding='ascii', encoding_errors='replace')
print("\nWith encoding_errors='replace':")
print(df_replace)
'''
Output:
Error with encoding_errors='strict': 'ascii' codec can't decode byte 0xc3 in position 47: ordinal not in range(128)
With encoding_errors='ignore':
Name City
0 Alice Mnchn
1 Bob Berlin
2 Chlo Hamburg
With encoding_errors='replace':
Name City
0 Alice M��nch��n
1 Bob Berlin
2 Chlo�� Hamburg
'''
It is used to specify whether the JSON data is structured as a series of newline-delimited JSON objects (also known as JSON Lines or NDJSON).
import pandas as pd
import json
# Sample data for JSON Lines format (one JSON object per line)
data = [
{"Name": "Alice", "Age": 25},
{"Name": "Bob", "Age": 30},
{"Name": "Chloe", "Age": 22}
]
# Define the file path
json_file_path = "data.json"
# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
for record in data:
json.dump(record, json_file)
json_file.write("\n")
# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True)
print(df)
When lines=False, it works only for regular JSONs.
Number of rows per chunk when using an iterator.
import pandas as pd
import json
# Sample data for JSON Lines format (one JSON object per line)
data = [
{"Name": "Alice", "Age": 25},
{"Name": "Bob", "Age": 30},
{"Name": "Chloe", "Age": 22}
]
# Define the file path
json_file_path = "data.json"
# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
for record in data:
json.dump(record, json_file)
json_file.write("\n")
# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True, chunksize=2)
print(next(df))
'''
Output:
Name Age
0 Alice 25
1 Bob 30
'''
Works only when lines=True.
It specifies the type of compression used for reading a file.
import pandas as pd
import json
# Create a sample DataFrame
data = {
"Name": ["Alice", "Bob", "Chloe"],
"Age": [25, 30, 22],
"City": ["New York", "Berlin", "Hamburg"]
}
df = pd.DataFrame(data)
# Write the DataFrame to a JSON file with Gzip compression
json_file_path = "data.json.gz"
df.to_json(json_file_path, orient='records', lines=True, compression='gzip')
# Read the compressed JSON file using pandas.read_json
df_read = pd.read_json(json_file_path, compression='gzip', lines=True)
print(df_read)
'''
Output:
Name Age City
0 Alice 25 New York
1 Bob 30 Berlin
2 Chloe 22 Hamburg
'''
.to_json() is used to write a DataFrame to a JSON file.
It is used to limit the number of rows that are read from the JSON.
import pandas as pd
import json
# Sample data for JSON Lines format (one JSON object per line)
data = [
{"Name": "Alice", "Age": 25},
{"Name": "Bob", "Age": 30},
{"Name": "Chloe", "Age": 22}
]
# Define the file path
json_file_path = "data.json"
# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
for record in data:
json.dump(record, json_file)
json_file.write("\n")
# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True, nrows=2)
print(df)
'''
Output:
Name Age
0 Alice 25
1 Bob 30
'''
Works only when lines=True.
Dictionary of storage-specific options, such as credentials for cloud storage.
The dtype_backend parameter is new in Pandas 2.0 which is used to specify the backend for handling the types of data when reading a file.
It specifies the underlying parsing engine to use when reading the file. Default 'ujson'.
import pandas as pd
import json
# Sample data for JSON Lines format (one JSON object per line)
data = [
{"Name": "Alice", "Age": 25},
{"Name": "Bob", "Age": 30},
{"Name": "Chloe", "Age": 22}
]
# Define the file path
json_file_path = "data.json"
# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
for record in data:
json.dump(record, json_file)
json_file.write("\n")
# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True, engine='ujson')
print(df)
'''
Output:
Name Age
0 Alice 25
1 Bob 30
2 Chloe 22
'''
When reading a file, pandas uses one of these engines to parse the content. The ujson engine is light-weight and easy to use., but the pyarrow engine is heavy and supports more advanced data operations beyond parsing and serializing JSON.