posit-dev · cpsievert · Dec 19, 2025 · Dec 19, 2025 · cpsievert · Dec 19, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -59,6 +59,14 @@ make py-build
 make py-docs
 ```
 
+Before finishing your implementation or committing any code, you should run:
+
+```bash
+uv run ruff check --fix pkg-py --config pyproject.toml
+```
+
+To get help with making sure code adheres to project standards.
+
 ### R Package
 
 ```bash

diff --git a/pkg-py/CHANGELOG.md b/pkg-py/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [UNRELEASED]
 
+### Breaking Changes
+
+* Methods like `execute_query()`, `get_data()`, and `df()` now return a `narwhals.DataFrame` instead of a `pandas.DataFrame`. This allows querychat to drop its `pandas` dependency, and for you to use any `narwhals`-compatible dataframe of your choosing.
+  * If this breaks existing code, note you can call `.to_native()` on the new dataframe value to get your `pandas` dataframe back.
+  * Note that `polars` or `pandas` will be needed to realize a `sqlalchemy` connection query as a dataframe. Install with `pip install querychat[pandas]` or `pip install querychat[polars]`
+
 ### New features
 
 * `QueryChat.sidebar()`, `QueryChat.ui()`, and `QueryChat.server()` now support an optional `id` parameter to create multiple chat instances from a single `QueryChat` object. (#172)

diff --git a/pkg-py/docs/build.qmd b/pkg-py/docs/build.qmd
@@ -203,7 +203,7 @@ with ui.layout_columns():
 
         @render_plotly
         def survival_plot():
-            d = qc.df()
+            d = qc.df().to_native()  # Convert for pandas groupby()
             summary = d.groupby('pclass')['survived'].mean().reset_index()
             return px.bar(summary, x='pclass', y='survived')
 ```
@@ -271,7 +271,7 @@ with ui.layout_columns():
 
         @render_plotly
         def survival_by_class():
-            df = qc.df()
+            df = qc.df().to_native()  # Convert for pandas groupby()
             summary = df.groupby('pclass')['survived'].mean().reset_index()
             return px.bar(
                 summary,
@@ -286,16 +286,14 @@ with ui.layout_columns():
 
         @render_plotly
         def age_dist():
-            df = qc.df()
-            return px.histogram(df, x='age', nbins=30)
+            return px.histogram(qc.df(), x='age', nbins=30)
 
     with ui.card():
         ui.card_header("Fare by Class")
 
         @render_plotly
         def fare_by_class():
-            df = qc.df()
-            return px.box(df, x='pclass', y='fare', color='survived')
+            return px.box(qc.df(), x='pclass', y='fare', color='survived')
 
 ui.page_opts(
     title="Titanic Survival Analysis",
@@ -461,7 +459,7 @@ with ui.layout_columns():
 
         @render.plot
         def survival_by_class():
-            df = qc.df()
+            df = qc.df().to_native()  # Convert for pandas groupby()
             summary = df.groupby('pclass')['survived'].mean().reset_index()
             fig = px.bar(
                 summary,
@@ -477,18 +475,14 @@ with ui.layout_columns():
 
         @render.plot
         def age_dist():
-            df = qc.df()
-            fig = px.histogram(df, x='age', nbins=30)
-            return fig
+            return px.histogram(qc.df(), x='age', nbins=30)
 
     with ui.card():
         ui.card_header("Fare by Class")
 
         @render.plot
         def fare_by_class():
-            df = qc.df()
-            fig = px.box(df, x='pclass', y='fare', color='survived')
-            return fig
+            return px.box(qc.df(), x='pclass', y='fare', color='survived')
 
 # Reset button handler
 @reactive.effect

diff --git a/pkg-py/docs/data-sources.qmd b/pkg-py/docs/data-sources.qmd
@@ -63,7 +63,7 @@ app = qc.app()
 
 :::
 
-If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `pandas.DataFrame` by default. 
+If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `narwhals.DataFrame`. Call `.to_native()` on the result to get the underlying pandas or polars DataFrame. 
 
 ## Databases
 

diff --git a/pkg-py/src/querychat/_datasource.py b/pkg-py/src/querychat/_datasource.py
@@ -5,12 +5,12 @@
 
 import duckdb
 import narwhals.stable.v1 as nw
-import pandas as pd
 from sqlalchemy import inspect, text
 from sqlalchemy.sql import sqltypes
 
+from ._df_compat import duckdb_result_to_nw, read_sql
+
 if TYPE_CHECKING:
-    from narwhals.stable.v1.typing import IntoFrame
     from sqlalchemy.engine import Connection, Engine
 
 
@@ -53,7 +53,7 @@ def get_schema(self, *, categorical_threshold: int) -> str:
         ...
 
     @abstractmethod
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute SQL query and return results as DataFrame.
 
@@ -65,20 +65,20 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as a pandas DataFrame
+            Query results as a narwhals DataFrame
 
         """
         ...
 
     @abstractmethod
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
         ...
@@ -99,27 +99,26 @@ def cleanup(self) -> None:
 
 
 class DataFrameSource(DataSource):
-    """A DataSource implementation that wraps a pandas DataFrame using DuckDB."""
+    """A DataSource implementation that wraps a DataFrame using DuckDB."""
 
-    _df: nw.DataFrame | nw.LazyFrame
+    _df: nw.DataFrame
 
-    def __init__(self, df: IntoFrame, table_name: str):
+    def __init__(self, df: nw.DataFrame, table_name: str):
         """
-        Initialize with a pandas DataFrame.
+        Initialize with a DataFrame.
 
         Parameters
         ----------
         df
-            The DataFrame to wrap
+            The DataFrame to wrap (pandas, polars, or any narwhals-compatible frame)
         table_name
             Name of the table in SQL queries
 
         """
         self._conn = duckdb.connect(database=":memory:")
-        self._df = nw.from_native(df)
+        self._df = nw.from_native(df) if not isinstance(df, nw.DataFrame) else df
         self.table_name = table_name
-        # TODO(@gadenbuie): If the data frame is already SQL-backed, maybe we shouldn't be making a new copy here.
-        self._conn.register(table_name, self._df.lazy().collect().to_pandas())
+        self._conn.register(table_name, self._df.to_native())
 
     def get_db_type(self) -> str:
         """
@@ -151,16 +150,8 @@ def get_schema(self, *, categorical_threshold: int) -> str:
         """
         schema = [f"Table: {self.table_name}", "Columns:"]
 
-        # Ensure we're working with a DataFrame, not a LazyFrame
-        ndf = (
-            self._df.head(10).collect()
-            if isinstance(self._df, nw.LazyFrame)
-            else self._df
-        )
-
-        for column in ndf.columns:
-            # Map pandas dtypes to SQL-like types
-            dtype = ndf[column].dtype
+        for column in self._df.columns:
+            dtype = self._df[column].dtype
             if dtype.is_integer():
                 sql_type = "INTEGER"
             elif dtype.is_float():
@@ -176,17 +167,14 @@ def get_schema(self, *, categorical_threshold: int) -> str:
 
             column_info = [f"- {column} ({sql_type})"]
 
-            # For TEXT columns, check if they're categorical
             if sql_type == "TEXT":
-                unique_values = ndf[column].drop_nulls().unique()
+                unique_values = self._df[column].drop_nulls().unique()
                 if unique_values.len() <= categorical_threshold:
                     categories = unique_values.to_list()
                     categories_str = ", ".join([f"'{c}'" for c in categories])
                     column_info.append(f"  Categorical values: {categories_str}")
-
-            # For numeric columns, include range
             elif sql_type in ["INTEGER", "FLOAT", "DATE", "TIME"]:
-                rng = ndf[column].min(), ndf[column].max()
+                rng = self._df[column].min(), self._df[column].max()
                 if rng[0] is None and rng[1] is None:
                     column_info.append("  Range: NULL to NULL")
                 else:
@@ -196,10 +184,12 @@ def get_schema(self, *, categorical_threshold: int) -> str:
 
         return "\n".join(schema)
 
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute query using DuckDB.
 
+        Uses polars if available, otherwise falls back to pandas.
+
         Parameters
         ----------
         query
@@ -208,23 +198,22 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as pandas DataFrame
+            Query results as narwhals DataFrame
 
         """
-        return self._conn.execute(query).df()
+        return duckdb_result_to_nw(self._conn.execute(query))
 
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
-        # TODO(@gadenbuie): This should just return `self._df` and not a pandas DataFrame
-        return self._df.lazy().collect().to_pandas()
+        return self._df
 
     def cleanup(self) -> None:
         """
@@ -412,10 +401,12 @@ def get_schema(self, *, categorical_threshold: int) -> str:  # noqa: PLR0912
 
         return "\n".join(schema)
 
-    def execute_query(self, query: str) -> pd.DataFrame:
+    def execute_query(self, query: str) -> nw.DataFrame:
         """
         Execute SQL query and return results as DataFrame.
 
+        Uses polars if available, otherwise falls back to pandas.
+
         Parameters
         ----------
         query
@@ -424,20 +415,20 @@ def execute_query(self, query: str) -> pd.DataFrame:
         Returns
         -------
         :
-            Query results as pandas DataFrame
+            Query results as narwhals DataFrame
 
         """
         with self._get_connection() as conn:
-            return pd.read_sql_query(text(query), conn)
+            return read_sql(text(query), conn)
 
-    def get_data(self) -> pd.DataFrame:
+    def get_data(self) -> nw.DataFrame:
         """
         Return the unfiltered data as a DataFrame.
 
         Returns
         -------
         :
-            The complete dataset as a pandas DataFrame
+            The complete dataset as a narwhals DataFrame
 
         """
         return self.execute_query(f"SELECT * FROM {self.table_name}")

diff --git a/pkg-py/src/querychat/_df_compat.py b/pkg-py/src/querychat/_df_compat.py
@@ -0,0 +1,74 @@
+"""
+DataFrame compatibility: try polars first, fall back to pandas.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import narwhals.stable.v1 as nw
+
+if TYPE_CHECKING:
+    import duckdb
+    from sqlalchemy.engine import Connection
+    from sqlalchemy.sql.elements import TextClause
+
+_INSTALL_MSG = "Install one with: pip install polars  OR  pip install pandas"
+
+
+def read_sql(query: TextClause, conn: Connection) -> nw.DataFrame:
+    try:
+        import polars as pl  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pl.read_database(query, connection=conn))
+    except Exception:  # noqa: S110
+        # Catches ImportError for polars, and other errors (e.g., missing pyarrow)
+        # Intentional fallback to pandas - no logging needed
+        pass
+
+    try:
+        import pandas as pd  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pd.read_sql_query(query, conn))
+    except ImportError:
+        pass
+
+    raise ImportError(f"SQLAlchemySource requires 'polars' or 'pandas'. {_INSTALL_MSG}")
+
+
+def duckdb_result_to_nw(
+    result: duckdb.DuckDBPyRelation | duckdb.DuckDBPyConnection,
+) -> nw.DataFrame:
+    try:
+        return nw.from_native(result.pl())
+    except Exception:  # noqa: S110
+        # Catches ImportError for polars, and other errors (e.g., missing pyarrow)
+        # Intentional fallback to pandas - no logging needed
+        pass
+
+    try:
+        return nw.from_native(result.df())
+    except ImportError:
+        pass
+
+    raise ImportError(f"DataFrameSource requires 'polars' or 'pandas'. {_INSTALL_MSG}")
+
+
+def read_csv(path: str) -> nw.DataFrame:
+    try:
+        import polars as pl  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pl.read_csv(path))
+    except Exception:  # noqa: S110
+        # Catches ImportError for polars, and other errors (e.g., missing pyarrow)
+        # Intentional fallback to pandas - no logging needed
+        pass
+
+    try:
+        import pandas as pd  # noqa: PLC0415  # pyright: ignore[reportMissingImports]
+
+        return nw.from_native(pd.read_csv(path, compression="gzip"))
+    except ImportError:
+        pass
+
+    raise ImportError(f"Loading data requires 'polars' or 'pandas'. {_INSTALL_MSG}")
-Original file line number
+Diff line change
@@ Expand Up / @@ -63,7 +63,7 @@ app = qc.app() @@
     :::
-    If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `pandas.DataFrame` by default.
+    If you're [building an app](build.qmd), note you can read the queried data frame reactively using the `df()` method, which returns a `narwhals.DataFrame`. Call `.to_native()` on the result to get the underlying pandas or polars DataFrame.
     ## Databases
@@ Expand Down @@