BetterDocs
Home
Docs

Creation | pd.read_json()

Method:

pd.read_json(path_or_buf, *, orient=None, typ='frame', dtype=None, convert_axes=None, convert_dates=True, keep_default_dates=True, precise_float=False, date_unit=None, encoding=None, encoding_errors='strict', lines=False, chunksize=None, compression='infer', nrows=None, storage_options=None, dtype_backend=<no_default>, engine='ujson')

Reads a JSON file or string into a DataFrame or Series.

Returns:

pandas.core.frame.DataFrame or pd.core.series.Series

Parameters:

path_or_buf: (file_path or buffer), Optional-

Path to the file or file-like object to read.

import pandas as pd
import json

# Create a sample JSON object
sample_json = {
    "Name": ["Alice", "Bob", "Chloe"],
    "Age": [25, 30, 22],
    "City": ["New York", "Los Angeles", "Chicago"]
}

# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
    json.dump(sample_json, json_file, indent=2)

# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path)
print(df)
'''
Output:
    Name  Age         City
0  Alice   25     New York
1    Bob   30  Los Angeles
2  Chloe   22      Chicago
'''

orient: ('split' or 'records' or 'index' or 'columns' or 'values' or 'table'), Optional-

Expected JSON data format.

orient = 'columns' (default) +

orient = 'split' +

orient = 'records' +

orient = 'index' +

orient = 'values' +

orient = 'table' +

typ: ('frame' or 'series'), Optional-

The type of object to recover.

typ = 'frame' (default) +

typ = 'series' +

dtype: 'infer', Optional-

Datatype for the object that is to be returned.

import pandas as pd
import json

# Create a sample JSON object
sample_json = {
    "Name": ["Alice", "Bob", "Chloe"],
    "Age": [25, 30, 22],
    "City": ["New York", "Los Angeles", "Chicago"]
}

# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
    json.dump(sample_json, json_file, indent=2)

# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, dtype={'Age': 'float32'})
print(df)
'''
Output:
    Name   Age         City
0  Alice  25.0     New York
1    Bob  30.0  Los Angeles
2  Chloe  22.0      Chicago
'''

convert_axes: (True or False), Optional-

Convert axis data types.

convert_axes = True (default) +

convert_axes = False +

convert_dates: None, Optional-

Parse dates from strings.

import pandas as pd
import json

# Create a sample JSON object
sample_json = {
    "Name": ["Alice", "Bob", "Chloe"],
    "Age": ["25", "30", "22"],
    "BirthDates": ["2003-10-02", "2003-04-16", "2023-12-29"]
}

# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
    json.dump(sample_json, json_file, indent=2)

# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, convert_dates=['BirthDates'])
print(df['BirthDates'])
'''
Output:
0   2003-10-02
1   2003-04-16
2   2023-12-29
Name: BirthDates, dtype: datetime64[ns]
'''

keep_default_dates: (True or False), Optional-

Keep default datetime parsing.

keep_default_dates = True (default) +

keep_default_dates = False +

Only if the label "ends with _at" or "ends with _time" or "begins with timestamp" or "is equal to date" or "is equal to modified", it is converted to dates when set to True.

precise_float: bool, Optional-

Use precise floating-point conversion (False by default).

date_unit: str, Optional-

Unit of time for datetime parsing (e.g., 's', 'ms').

import pandas as pd
import json

# Create a sample JSON object
sample_json = {
    "Name": ["Alice", "Bob", "Chloe"],
    "Age": ["25", "30", "22"],
    "Timestamp": [1065117600, 1050480000, 1703876400]
}

# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w") as json_file:
    json.dump(sample_json, json_file, indent=2)

# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, date_unit='s')
print(df)
'''
Output:
    Name  Age           Timestamp
0  Alice   25 2003-10-02 18:00:00
1    Bob   30 2003-04-16 08:00:00
2  Chloe   22 2023-12-29 19:00:00
'''

encoding: None, Optional-

It specifies the character encoding to use when reading a JSON file.

import pandas as pd
import json

# Create a sample JSON object
sample_json = {
    "Name": ["Alice", "Bob", "Chloe"],
    "Age": ["25", "30", "22"],
}

# Write the JSON to a file
json_file_path = "sample_data.json"
with open(json_file_path, "w", encoding="utf-8") as json_file:
    json.dump(sample_json, json_file, indent=2)

# Read the JSON file into a DataFrame
df = pd.read_json(path_or_buf=json_file_path, encoding="utf-8")
print(df)
'''
Output:
    Name  Age
0  Alice   25
1    Bob   30
2  Chloe   22
'''

encoding_errors: str, Optional-

It allows you to control how encoding errors are handled during the reading process.

import pandas as pd
import json

# Create a sample JSON object with an intentional encoding issue
sample_json = {
    "Name": ["Alice", "Bob", "Chloé"],
    "City": ["München", "Berlin", "Hamburg"]
}

# Add an invalid character intentionally into the JSON for testing
# e.g., introducing a broken character in the name of the city
sample_json["City"][0] = "Münchën"  # 'ü' becomes 'ë' as an invalid byte

# Write the JSON data to a file
json_file_path = "sample_data_with_invalid_encoding.json"
with open(json_file_path, "w", encoding="utf-8") as json_file:
    json.dump(sample_json, json_file, ensure_ascii=False, indent=2)

# Read the JSON file with encoding_errors='strict' (default)
try:
    df_strict = pd.read_json(json_file_path, encoding='ascii', encoding_errors='strict')
    print("With encoding_errors='strict':")
    print(df_strict)
except UnicodeDecodeError as e:
    print("Error with encoding_errors='strict':", e)

# Read the JSON file with encoding_errors='ignore'
df_ignore = pd.read_json(json_file_path, encoding='ascii', encoding_errors='ignore')
print("\nWith encoding_errors='ignore':")
print(df_ignore)

# Read the JSON file with encoding_errors='replace'
df_replace = pd.read_json(json_file_path, encoding='ascii', encoding_errors='replace')
print("\nWith encoding_errors='replace':")
print(df_replace)
'''
Output:
Error with encoding_errors='strict': 'ascii' codec can't decode byte 0xc3 in position 47: ordinal not in range(128)

With encoding_errors='ignore':
    Name     City
0  Alice    Mnchn
1    Bob   Berlin
2   Chlo  Hamburg

With encoding_errors='replace':
     Name       City
0   Alice  M��nch��n
1     Bob     Berlin
2  Chlo��    Hamburg
'''

lines: None, Optional-

It is used to specify whether the JSON data is structured as a series of newline-delimited JSON objects (also known as JSON Lines or NDJSON).

import pandas as pd
import json

# Sample data for JSON Lines format (one JSON object per line)
data = [
    {"Name": "Alice", "Age": 25},
    {"Name": "Bob", "Age": 30},
    {"Name": "Chloe", "Age": 22}
]

# Define the file path
json_file_path = "data.json"

# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
    for record in data:
        json.dump(record, json_file)
        json_file.write("\n")

# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True)
print(df)

When lines=False, it works only for regular JSONs.

chunksize: None, Optional-

Number of rows per chunk when using an iterator.

import pandas as pd
import json

# Sample data for JSON Lines format (one JSON object per line)
data = [
    {"Name": "Alice", "Age": 25},
    {"Name": "Bob", "Age": 30},
    {"Name": "Chloe", "Age": 22}
]

# Define the file path
json_file_path = "data.json"

# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
    for record in data:
        json.dump(record, json_file)
        json_file.write("\n")

# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True, chunksize=2)
print(next(df))
'''
Output:
    Name  Age
0  Alice   25
1    Bob   30
'''

Works only when lines=True.

compression: str, Optional-

It specifies the type of compression used for reading a file.

import pandas as pd
import json

# Create a sample DataFrame
data = {
    "Name": ["Alice", "Bob", "Chloe"],
    "Age": [25, 30, 22],
    "City": ["New York", "Berlin", "Hamburg"]
}
df = pd.DataFrame(data)

# Write the DataFrame to a JSON file with Gzip compression
json_file_path = "data.json.gz"
df.to_json(json_file_path, orient='records', lines=True, compression='gzip')

# Read the compressed JSON file using pandas.read_json
df_read = pd.read_json(json_file_path, compression='gzip', lines=True)
print(df_read)
'''
Output:
    Name  Age      City
0  Alice   25  New York
1    Bob   30    Berlin
2  Chloe   22   Hamburg
'''

.to_json()  is used to write a DataFrame to a JSON file.

nrows: None, Optional-

It is used to limit the number of rows that are read from the JSON.

import pandas as pd
import json

# Sample data for JSON Lines format (one JSON object per line)
data = [
    {"Name": "Alice", "Age": 25},
    {"Name": "Bob", "Age": 30},
    {"Name": "Chloe", "Age": 22}
]

# Define the file path
json_file_path = "data.json"

# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
    for record in data:
        json.dump(record, json_file)
        json_file.write("\n")

# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True, nrows=2)
print(df)
'''
Output:
    Name  Age
0  Alice   25
1    Bob   30
'''

Works only when lines=True.

storage_options: dict, Optional-

Dictionary of storage-specific options, such as credentials for cloud storage.

dtype_backend: None, Optional-

The dtype_backend parameter is new in Pandas 2.0 which is used to specify the backend for handling the types of data when reading a file.

engine: ('ujson' or 'pyarrow'), Optional-

It specifies the underlying parsing engine to use when reading the file. Default 'ujson'.

import pandas as pd
import json

# Sample data for JSON Lines format (one JSON object per line)
data = [
    {"Name": "Alice", "Age": 25},
    {"Name": "Bob", "Age": 30},
    {"Name": "Chloe", "Age": 22}
]

# Define the file path
json_file_path = "data.json"

# Write data to the file (one JSON object per line)
with open(json_file_path, "w") as json_file:
    for record in data:
        json.dump(record, json_file)
        json_file.write("\n")

# Now, read the JSON data back with lines=True
df = pd.read_json(json_file_path, lines=True, engine='ujson')
print(df)
'''
Output:
    Name  Age
0  Alice   25
1    Bob   30
2  Chloe   22
'''

When reading a file, pandas uses one of these engines to parse the content. The ujson engine is light-weight and easy to use., but the pyarrow engine is heavy and supports more advanced data operations beyond parsing and serializing JSON.


Logo

BetterDocs

Support

EmailDiscordForms

Documentations

Python

Company

AboutDocs

Policies

Terms of ServicePrivacy Policy