pandas-dev · jbrockmendel · Mar 12, 2026 · Mar 19, 2026 · Mar 22, 2026
diff --git a/doc/source/whatsnew/v3.1.0.rst b/doc/source/whatsnew/v3.1.0.rst
@@ -30,6 +30,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :meth:`.DataFrameGroupBy.agg` now allows for the provided ``func`` to return a NumPy array (:issue:`63957`)
 - Added :meth:`ExtensionArray.count` (:issue:`64450`)
+- Added ``observed`` parameter to :func:`crosstab` (:issue:`53521`)
 - Display formatting for float sequences in DataFrame cells now respects the ``display.precision`` option (:issue:`60503`).
 - Improved the precision of float parsing in :func:`read_csv` (:issue:`64395`)
 - Improved the string ``repr`` of :class:`pd.core.arrays.SparseArray` (:issue:`64547`)
@@ -101,6 +102,7 @@ Deprecations
 - Deprecated automatic dtype promotion when reindexing with a ``fill_value`` that cannot be held by the original dtype. Explicitly cast to a common dtype instead (:issue:`53910`)
 - Deprecated passing unnecessary ``*args`` and ``**kwargs`` to :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.cummax`, :meth:`.SeriesGroupBy.skew`, :meth:`.DataFrameGroupBy.skew`, :meth:`.SeriesGroupBy.take`, and :meth:`.DataFrameGroupBy.take`. The ``skipna`` parameter for the cum* methods is now an explicit keyword argument (:issue:`50407`)
 - Deprecated the ``.name`` property of offset objects (e.g., :class:`~pandas.tseries.offsets.Day`, :class:`~pandas.tseries.offsets.Hour`). Use ``.rule_code`` instead (:issue:`64207`)
+- Deprecated the ``dropna`` keyword in :func:`pivot_table` and :func:`crosstab`. Manually handle NA values before and after calling these functions instead (:issue:`53521`)
 - Deprecated the ``xlrd`` and ``pyxlsb`` engines in :func:`read_excel`. Use ``engine="calamine"`` instead (:issue:`56542`)
 - Deprecated the default value of ``exact`` in :func:`assert_index_equal`; in a future version this will default to ``True`` instead of "equiv" (:issue:`57436`)
 -
@@ -223,7 +225,9 @@ Groupby/resample/rolling
 
 Reshaping
 ^^^^^^^^^
+- Bug in :func:`crosstab` where the ``observed`` keyword was not available and was instead incorrectly coupled to the ``dropna`` keyword (:issue:`53521`)
 - Bug in :func:`merge` where merging on a :class:`MultiIndex` containing ``NaN`` values mapped ``NaN`` keys to the last level value instead of ``NaN`` (:issue:`64492`)
+- Bug in :func:`pivot_table` where the ``observed`` parameter was ignored during margins computation (:issue:`53521`)
 - In :func:`pivot_table`, when ``values`` is empty, the aggregation will be computed on a Series of all NA values (:issue:`46475`)
 -
 

diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -757,11 +757,10 @@ cdef class {{name}}HashTable(HashTable):
                         append_data_uint8(rmd, 1)
                         continue
 
-                k = kh_get_{{dtype}}(self.table, val)
+                k = kh_put_{{dtype}}(self.table, val, &ret)
 
-                if k == self.table.n_buckets:
+                if ret != 0:
                     # k hasn't been seen yet
-                    k = kh_put_{{dtype}}(self.table, val, &ret)
 
                     if needs_resize(ud.size, ud.capacity):
                         with gil:
@@ -899,12 +898,8 @@ cdef class {{name}}HashTable(HashTable):
                     labels[i] = -1
                     continue
 
-                k = kh_get_{{dtype}}(self.table, val)
-                if k != self.table.n_buckets:
-                    idx = self.table.vals[k]
-                    labels[i] = idx
-                else:
-                    k = kh_put_{{dtype}}(self.table, val, &ret)
+                k = kh_put_{{dtype}}(self.table, val, &ret)
+                if ret != 0:
                     self.table.vals[k] = count
 
                     if needs_resize(ud.size, ud.capacity):
@@ -913,6 +908,9 @@ cdef class {{name}}HashTable(HashTable):
                     append_data_{{dtype}}(ud, val)
                     labels[i] = count
                     count += 1
+                else:
+                    idx = self.table.vals[k]
+                    labels[i] = idx
 
         arr_uniques = uniques.to_array()
 
@@ -1221,10 +1219,9 @@ cdef class StringHashTable(HashTable):
                     continue
 
                 v = vecs[i]
-                k = kh_get_str(self.table, v)
-                if k == self.table.n_buckets:
+                k = kh_put_str(self.table, v, &ret)
+                if ret != 0:
                     # k hasn't been seen yet
-                    k = kh_put_str(self.table, v, &ret)
                     uindexer[count] = i
                     if return_inverse:
                         self.table.vals[k] = count
@@ -1494,10 +1491,9 @@ cdef class PyObjectHashTable(HashTable):
                 labels[i] = na_sentinel
                 continue
 
-            k = kh_get_pymap(self.table, <PyObject*>val)
-            if k == self.table.n_buckets:
+            k = kh_put_pymap(self.table, <PyObject*>val, &ret)
+            if ret != 0:
                 # k hasn't been seen yet
-                k = kh_put_pymap(self.table, <PyObject*>val, &ret)
                 uniques.append(val)
                 if return_inverse:
                     self.table.vals[k] = count

diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in
@@ -61,18 +61,17 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
     if uses_mask:
         raise NotImplementedError("uses_mask not implemented with object dtype")
 
-    kh_resize_{{ttype}}(table, n // 10)
+    kh_resize_{{ttype}}(table, n)
 
     for i in range(n):
         val = values[i]
         if not dropna or not checknull(val):
-            k = kh_get_{{ttype}}(table, {{to_c_type}}val)
-            if k != table.n_buckets:
-                table.vals[k] += 1
-            else:
-                k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret)
+            k = kh_put_{{ttype}}(table, {{to_c_type}}val, &ret)
+            if ret != 0:
                 table.vals[k] = 1
                 result_keys.append(val)
+            else:
+                table.vals[k] += 1
     {{else}}
     kh_resize_{{ttype}}(table, n)
 
@@ -90,13 +89,12 @@ cdef value_count_{{dtype}}(const {{dtype}}_t[:] values, bint dropna, const uint8
             if uses_mask and isna_entry:
                 na_counter += 1
             else:
-                k = kh_get_{{ttype}}(table, val)
-                if k != table.n_buckets:
-                    table.vals[k] += 1
-                else:
-                    k = kh_put_{{ttype}}(table, val, &ret)
+                k = kh_put_{{ttype}}(table, val, &ret)
+                if ret != 0:
                     table.vals[k] = 1
                     result_keys.append(val)
+                else:
+                    table.vals[k] += 1
     {{endif}}
 
     # collect counts in the order corresponding to result_keys:
@@ -193,14 +191,13 @@ cdef duplicated_{{dtype}}(const {{dtype}}_t[:] values, object keep='first', cons
 
                 else:
                     value = {{to_c_type}}(values[i])
-                    k = kh_get_{{ttype}}(table, value)
-                    if k != table.n_buckets:
-                        out[table.vals[k]] = 1
-                        out[i] = 1
-                    else:
-                        k = kh_put_{{ttype}}(table, value, &ret)
+                    k = kh_put_{{ttype}}(table, value, &ret)
+                    if ret != 0:
                         table.vals[k] = i
                         out[i] = 0
+                    else:
+                        out[table.vals[k]] = 1
+                        out[i] = 1
 
     kh_destroy_{{ttype}}(table)
     return out

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -12806,7 +12806,7 @@ def pivot_table(
         aggfunc: AggFuncType = "mean",
         fill_value=None,
         margins: bool = False,
-        dropna: bool = True,
+        dropna: bool | lib.NoDefault = lib.no_default,
         margins_name: Level = "All",
         observed: bool = True,
         sort: bool = True,
@@ -12854,6 +12854,10 @@ def pivot_table(
             * index/column keys containing NA values will be dropped (see ``dropna``
               parameter in :meth:`DataFrame.groupby`).
 
+            .. deprecated:: 3.1.0
+                The dropna keyword is deprecated. Manually handle NA values
+                before and after calling pivot_table.
+
         margins_name : str, default 'All'
             Name of the row / column that will contain the totals
             when margins is True.

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -6,11 +6,14 @@
     Literal,
     cast,
 )
+import warnings
 
 import numpy as np
 
 from pandas._libs import lib
+from pandas.errors import Pandas4Warning
 from pandas.util._decorators import set_module
+from pandas.util._exceptions import find_stack_level
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
@@ -64,7 +67,7 @@ def pivot_table(
     aggfunc: AggFuncType = "mean",
     fill_value=None,
     margins: bool = False,
-    dropna: bool = True,
+    dropna: bool | lib.NoDefault = lib.no_default,
     margins_name: Hashable = "All",
     observed: bool = True,
     sort: bool = True,
@@ -113,6 +116,10 @@ def pivot_table(
         * index/column keys containing NA values will be dropped (see ``dropna``
           parameter in :meth:``DataFrame.groupby``).
 
+        .. deprecated:: 3.1.0
+            The dropna keyword is deprecated. Manually handle NA values
+            before and after calling pivot_table.
+
     margins_name : str, default 'All'
         Name of the row / column that will contain the totals
         when margins is True.
@@ -246,6 +253,17 @@ def pivot_table(
     foo large  2.000000   5  4.500000    4
         small  2.333333   6  4.333333    2
     """
+    if dropna is not lib.no_default:
+        warnings.warn(
+            "The dropna keyword in pivot_table is deprecated and will be "
+            "removed in a future version. Manually handle NA values before "
+            "and after calling pivot_table.",
+            Pandas4Warning,
+            stacklevel=find_stack_level(),
+        )
+    else:
+        dropna = True
+
     index = _convert_by(index)
     columns = _convert_by(columns)
 
@@ -407,7 +425,7 @@ def __internal_pivot_table(
             cols=columns,
             aggfunc=aggfunc,
             kwargs=kwargs,
-            observed=dropna,
+            observed=observed,
             margins_name=margins_name,
             fill_value=fill_value,
             dropna=dropna,
@@ -960,8 +978,9 @@ def crosstab(
     aggfunc=None,
     margins: bool = False,
     margins_name: Hashable = "All",
-    dropna: bool = True,
+    dropna: bool | lib.NoDefault = lib.no_default,
     normalize: bool | Literal[0, 1, "all", "index", "columns"] = False,
+    observed: bool = True,
 ) -> DataFrame:
     """
     Compute a simple cross tabulation of two (or more) factors.
@@ -991,6 +1010,11 @@ def crosstab(
         when margins is True.
     dropna : bool, default True
         Do not include columns whose entries are all NaN.
+
+        .. deprecated:: 3.1.0
+            The dropna keyword is deprecated. Manually handle NA values
+            before and after calling crosstab.
+
     normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False
         Normalize by dividing all values by the sum of values.
 
@@ -999,6 +1023,13 @@ def crosstab(
         - If passed 'columns' will normalize over each column.
         - If margins is `True`, will also normalize margin values.
 
+    observed : bool, default True
+        This only applies if any of the groupers are Categoricals.
+        If True: only show observed values for categorical groupers.
+        If False: show all values for categorical groupers.
+
+        .. versionadded:: 3.1.0
+
     Returns
     -------
     DataFrame
@@ -1098,6 +1129,15 @@ def crosstab(
     b      0  1  0
     c      0  0  0
     """
+    if dropna is not lib.no_default:
+        warnings.warn(
+            "The dropna keyword in crosstab is deprecated and will be "
+            "removed in a future version. Manually handle NA values before "
+            "and after calling crosstab.",
+            Pandas4Warning,
+            stacklevel=find_stack_level(),
+        )
+
     if values is None and aggfunc is not None:
         raise ValueError("aggfunc cannot be used without values.")
 
@@ -1149,7 +1189,7 @@ def crosstab(
         margins=margins,
         margins_name=margins_name,
         dropna=dropna,
-        observed=dropna,
+        observed=observed,
         **kwargs,  # type: ignore[arg-type]
     )