SQL query or table name to execute.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE emp (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO emp (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()
df = pd.read_sql(sql="SELECT * FROM emp", con=conn)
print(df)
conn.close()
'''
Output:
id name salary
0 1 Alice 75000.5
1 2 Bob 80000.0
2 3 Chloe 85000.0
'''
SQLAlchemy connection engine or database connection.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE emp (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO emp (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()
df = pd.read_sql(sql="SELECT * FROM emp", con=conn)
print(df)
conn.close()
'''
Output:
id name salary
0 1 Alice 75000.5
1 2 Bob 80000.0
2 3 Chloe 85000.0
'''
Column(s) to set as the DataFrame index.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE emp (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO emp (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()
df = pd.read_sql(sql="SELECT * FROM emp", con=conn, index_col="id")
print(df)
conn.close()
'''
Output:
name salary
id
1 Alice 75000.5
2 Bob 80000.0
3 Chloe 85000.0
'''
It is used to control whether non-numeric data that can be interpreted as numeric values should be converted to float type when reading data from the SQL table into a DataFrame.
It allows you to safely pass values to a SQL query as parameters.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE employees (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO employees (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()
# Query with parameters using ?
query = "SELECT id, name, salary FROM employees WHERE salary > ?"
df = pd.read_sql(sql=query, con=conn, params=[78000.0])
conn.close()
print(df)
'''
Output:
id name salary
0 2 Bob 80000.0
1 3 Chloe 85000.0
'''
Columns to parse as dates.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
# Create a table
cursor.execute('CREATE TABLE employees (id INTEGER PRIMARY KEY, name TEXT, salary REAL, date TEXT)')
cursor.executemany('INSERT INTO employees (name, salary, date) VALUES (?, ?, ?)', [('Alice', 75000.5, "2003-10-02"), ('Bob', 80000.0, '2003-04-16'), ('Chloe', 85000.0, '2023-12-29')])
conn.commit()
# Use pandas.read_sql to fetch data
df = pd.read_sql(sql="SELECT id, name, salary, date FROM employees", con=conn, parse_dates=["date"])
conn.close()
print(df['date'])
'''
Output:
0 2003-10-02
1 2003-04-16
2 2023-12-29
Name: date, dtype: datetime64[ns]
'''
List of specific columns to read.
import sqlite3
import pandas as pd
from sqlalchemy import create_engine
# Create a SQLite database connection
conn = sqlite3.connect('example.db')
cursor = conn.cursor()
# Create a sample table
cursor.execute('CREATE TABLE IF NOT EXISTS employee (id INTEGER PRIMARY KEY, name TEXT, salary INTEGER, department TEXT)')
# Insert sample data into the table
cursor.executemany('INSERT INTO employee (name, salary, department) VALUES (?, ?, ?)', [('Alice', 75000, 'HR'), ('Bob', 80000, 'Engineering'), ('Chloe', 85000, 'Marketing')])
conn.commit()
conn.close()
# Create SQLAlchemy engine for pandas to use
engine = create_engine('sqlite:///example.db')
# NOTE: read_sql only works with SQLAlchemy engines, not sqlite3 directly
df = pd.read_sql(sql='employee', con=engine, columns=["id", 'name'])
print(df)
'''
Output:
id name
0 1 Alice
1 2 Bob
2 3 Chloe
'''
columns only works when sql is a table and not a query.
Number of rows per chunk for iteration.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE employees (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO employees (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()
df = pd.read_sql(sql="SELECT * FROM employees", con=conn, chunksize=2)
print(next(df))
conn.close()
'''
Output:
id name salary
0 1 Alice 75000.5
1 2 Bob 80000.0
'''
The connection must always be closed after iterating through all the chunks from the DataFrame Iterator.
The dtype_backend parameter is new in Pandas 2.0 which is used to specify the backend for handling the types of data when reading a file.
Dictionary specifying data types for specific columns.
import sqlite3
import pandas as pd
# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE employees (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO employees (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()
df = pd.read_sql(sql="SELECT id, name FROM employees", con=conn, dtype={"id": "float16"})
print(df)
conn.close()
'''
Output:
id name
0 1.0 Alice
1 2.0 Bob
2 3.0 Chloe
'''
int8: 8-bit signed integer (range: -128 to 127).
int16: 16-bit signed integer (range: -32,768 to 32,767).
int32: 32-bit signed integer (range: -2,147,483,648 to 2,147,483,647).
int64: 64-bit signed integer (large integer range).
uint8: 8-bit unsigned integer (range: 0 to 255).
uint16: 16-bit unsigned integer (range: 0 to 65,535).
uint32: 32-bit unsigned integer (range: 0 to 4,294,967,295).
uint64: 64-bit unsigned integer (large positive integer range).
float16: Half precision floating-point (16-bit, for low-precision computations).
float32: Single precision floating-point (32-bit).
float64: Double precision floating-point (64-bit, the default float in NumPy).
float128: Extended precision floating-point (128-bit, availability depends on system).
complex64: Complex number represented by two 32-bit floats (for real and imaginary parts).
complex128: Complex number represented by two 64-bit floats (default complex dtype).
complex256: Complex number represented by two 128-bit floats (system-dependent).
bool: Boolean type, can be either True or False (stored as 1-bit but takes up a full byte).
str: Fixed-length Unicode string, specified by S + length (e.g., S10 for a 10-character string).
unicode: Fixed-length Unicode string with support for multiple characters (uses U).
object: Allows storing any Python object, including mixed types, strings, or other arrays. Useful for heterogeneous data but slower than native NumPy types.
datetime64: Stores dates and times with varying precisions (e.g., Y, M, D, h, m, s, ms, us, ns, ps, fs, as). Example: datetime64('2003-10-02')
timedelta64: Represents time durations with units (same units as datetime64).