BetterDocs
Home
Docs

Creation | pd.read_sql()

Method:

pd.read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None, dtype_backend=<no_default>, dtype=None)

Reads SQL query or table into a DataFrame.

Returns:

pandas.core.frame.DataFrame

Parameters:

sql: (SQLAlchemy Selectable or str)-

SQL query or table name to execute.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE emp (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO emp (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()

df = pd.read_sql(sql="SELECT * FROM emp", con=conn)
print(df)
conn.close()
'''
Output:
   id   name   salary
0   1  Alice  75000.5
1   2    Bob  80000.0
2   3  Chloe  85000.0
'''

con: (SQLAlchemy connectable or str)-

SQLAlchemy connection engine or database connection.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE emp (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO emp (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()

df = pd.read_sql(sql="SELECT * FROM emp", con=conn)
print(df)
conn.close()
'''
Output:
   id   name   salary
0   1  Alice  75000.5
1   2    Bob  80000.0
2   3  Chloe  85000.0
'''

index_col: str, Optional-

Column(s) to set as the DataFrame index.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE emp (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO emp (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()

df = pd.read_sql(sql="SELECT * FROM emp", con=conn, index_col="id")
print(df)
conn.close()
'''
Output:
     name   salary
id                
1   Alice  75000.5
2     Bob  80000.0
3   Chloe  85000.0
'''

coerce_float: bool, Optional-

It is used to control whether non-numeric data that can be interpreted as numeric values should be converted to float type when reading data from the SQL table into a DataFrame.

params: None, Optional-

It allows you to safely pass values to a SQL query as parameters.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE employees (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO employees (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()

# Query with parameters using ?
query = "SELECT id, name, salary FROM employees WHERE salary > ?"
df = pd.read_sql(sql=query, con=conn, params=[78000.0])
conn.close()
print(df)
'''
Output:
   id   name   salary
0   2    Bob  80000.0
1   3  Chloe  85000.0
'''

parse_dates: None, Optional-

Columns to parse as dates.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()

# Create a table
cursor.execute('CREATE TABLE employees (id INTEGER PRIMARY KEY, name TEXT, salary REAL, date TEXT)')
cursor.executemany('INSERT INTO employees (name, salary, date) VALUES (?, ?, ?)', [('Alice', 75000.5, "2003-10-02"), ('Bob', 80000.0, '2003-04-16'), ('Chloe', 85000.0, '2023-12-29')])
conn.commit()

# Use pandas.read_sql to fetch data
df = pd.read_sql(sql="SELECT id, name, salary, date FROM employees", con=conn, parse_dates=["date"])
conn.close()
print(df['date'])
'''
Output:
0   2003-10-02
1   2003-04-16
2   2023-12-29
Name: date, dtype: datetime64[ns]
'''

columns: None, Optional-

List of specific columns to read.

import sqlite3
import pandas as pd
from sqlalchemy import create_engine

# Create a SQLite database connection
conn = sqlite3.connect('example.db')
cursor = conn.cursor()

# Create a sample table
cursor.execute('CREATE TABLE IF NOT EXISTS employee (id INTEGER PRIMARY KEY, name TEXT, salary INTEGER, department TEXT)')

# Insert sample data into the table
cursor.executemany('INSERT INTO employee (name, salary, department) VALUES (?, ?, ?)', [('Alice', 75000, 'HR'), ('Bob', 80000, 'Engineering'), ('Chloe', 85000, 'Marketing')])
conn.commit()
conn.close()

# Create SQLAlchemy engine for pandas to use
engine = create_engine('sqlite:///example.db')

# NOTE: read_sql only works with SQLAlchemy engines, not sqlite3 directly
df = pd.read_sql(sql='employee', con=engine, columns=["id", 'name'])
print(df)
'''
Output:
   id   name
0   1  Alice
1   2    Bob
2   3  Chloe
'''

columns only works when sql is a table and not a query.

chunksize: None, Optional-

Number of rows per chunk for iteration.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE employees (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO employees (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()

df = pd.read_sql(sql="SELECT * FROM employees", con=conn, chunksize=2)
print(next(df))
conn.close()
'''
Output:
   id   name   salary
0   1  Alice  75000.5
1   2    Bob  80000.0
'''

The connection must always be closed after iterating through all the chunks from the DataFrame Iterator.

dtype_backend: None, Optional-

The dtype_backend parameter is new in Pandas 2.0 which is used to specify the backend for handling the types of data when reading a file.

dtype: data-type, Optional-

Dictionary specifying data types for specific columns.

import sqlite3
import pandas as pd

# Create an in-memory SQLite database and sample data
conn = sqlite3.connect(':memory:')
cursor = conn.cursor()
cursor.execute('CREATE TABLE employees (id INTEGER, name TEXT, salary REAL)')
cursor.executemany('INSERT INTO employees (id, name, salary) VALUES (?, ?, ?)', [(1, 'Alice', 75000.5), (2, 'Bob', 80000.0), (3, 'Chloe', 85000.0)])
conn.commit()

df = pd.read_sql(sql="SELECT id, name FROM employees", con=conn, dtype={"id": "float16"})
print(df)
conn.close()
'''
Output:
    id   name
0  1.0  Alice
1  2.0    Bob
2  3.0  Chloe
'''

Values: +


Logo

BetterDocs

Support

EmailDiscordForms

Documentations

Python

Company

AboutDocs

Policies

Terms of ServicePrivacy Policy