diff --git a/CHANGES.rst b/CHANGES.rst index 208e02c3..7402120d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -2,6 +2,14 @@ Changes for crate ================= +Unreleased +================ + +- Fixed ``cursor.execute()`` with ``bulk_parameters`` and pyformat SQL: when + rows are dicts, both the SQL template and the rows are now converted to + positional format before sending to CrateDB. Positional-list rows + continue to work as before. + 2026/06/04 2.2.0 ========== - Added JSON serialization support for Python's ``datetime.time`` type, diff --git a/docs/by-example/cursor.rst b/docs/by-example/cursor.rst index ab676601..3dc6433a 100644 --- a/docs/by-example/cursor.rst +++ b/docs/by-example/cursor.rst @@ -266,6 +266,35 @@ For completeness' sake the cursor description is updated nonetheless: >>> [ desc[0] for desc in cursor.description ] ['name', 'position'] +executemany() with named parameters +==================================== + +``executemany()`` also accepts a :class:`py:list` of :class:`py:dict` when +the SQL uses ``%(name)s`` placeholders. The client converts both the SQL +template and all rows to positional format before sending to CrateDB: + + >>> connection.client.set_next_response({ + ... "results": [ + ... {"rowcount": 1}, + ... {"rowcount": 1} + ... ], + ... "duration": 123, + ... "cols": [], + ... }) + + >>> cursor = connection.cursor() + + >>> cursor.executemany( + ... "INSERT INTO t (id, val) VALUES (%(id)s, %(val)s)", + ... [{"id": 1, "val": "foo"}, {"id": 2, "val": "bar"}]) + [{'rowcount': 1}, {'rowcount': 1}] + + >>> cursor.rowcount + 2 + + >>> cursor.duration + 123 + >>> connection.client.set_next_response({ ... "rows":[ [ "North West Ripple", 1 ], [ "Arkintoofle Minor", 3 ], [ "Alpha Centauri", 3 ] ], ... "cols":[ "name", "position" ], diff --git a/docs/query.rst b/docs/query.rst index 5495e78d..53e80db6 100644 --- a/docs/query.rst +++ b/docs/query.rst @@ -72,20 +72,20 @@ The same parameter name may appear multiple times in the query: ... "SELECT * FROM locations WHERE name = %(q)s OR kind = %(q)s", ... {"q": "Quasar"}) -The client converts the ``%(name)s`` placeholders to positional ``?`` markers -before sending the query to CrateDB, so no server-side changes are required. - -.. NOTE:: - - Named parameters are not yet supported by ``executemany()``. Use - positional ``?`` placeholders with a :class:`py:list` of tuples for bulk - operations. +The client converts the ``%(name)s`` placeholders to ``$N`` positional +markers before sending the query to CrateDB, so no server-side changes are +required. Bulk inserts ------------ :ref:`Bulk inserts ` are possible with the -``executemany()`` method, which takes a :class:`py:list` of tuples to insert: +``executemany()`` method. + +Positional parameters +..................... + +Pass a :class:`py:list` of tuples using ``?`` placeholders: >>> cursor.executemany( ... "INSERT INTO locations (name, date, kind, position) VALUES (?, ?, ?, ?)", @@ -94,10 +94,56 @@ Bulk inserts [{'rowcount': 1}, {'rowcount': 1}] The ``executemany()`` method returns a result :class:`dictionary ` -for every tuple. This dictionary always has a ``rowcount`` key, indicating +for every row. This dictionary always has a ``rowcount`` key, indicating how many rows were inserted. If an error occurs, the ``rowcount`` value is ``-2``, and the dictionary may additionally have an ``error_message`` key. +Named parameters +................ + +``executemany()`` also accepts a :class:`py:list` of :class:`py:dict` using +``%(name)s`` placeholders. The client converts both the SQL template and all +rows to positional format before sending to CrateDB: + + >>> cursor.executemany( + ... "INSERT INTO locations (name, date, kind, position) " + ... "VALUES (%(name)s, %(date)s, %(kind)s, %(pos)s)", + ... [{"name": "Cloverleaf", "date": "2007-03-11", "kind": "Quasar", "pos": 7}, + ... {"name": "Old Faithful", "date": "2007-03-11", "kind": "Quasar", "pos": 8}]) + [{'rowcount': 1}, {'rowcount': 1}] + +Using ``bulk_parameters`` directly +................................... + +For advanced use cases (such as SQLAlchemy integrations) you can call +``execute()`` directly with the ``bulk_parameters`` keyword argument, +bypassing ``executemany()``: + + >>> cursor.execute( + ... "INSERT INTO locations (name, kind, position) VALUES (?, ?, ?)", + ... bulk_parameters=[('Cloverleaf', 'Quasar', 7), + ... ('Old Faithful', 'Quasar', 8)]) + +Named ``%(name)s`` placeholders are also supported. When the rows are +:class:`py:dict` objects the SQL template and rows are fully converted, +identical to the ``executemany()`` path: + + >>> cursor.execute( + ... "INSERT INTO locations (name, kind, position) " + ... "VALUES (%(name)s, %(kind)s, %(pos)s)", + ... bulk_parameters=[{"name": "Cloverleaf", "kind": "Quasar", "pos": 7}, + ... {"name": "Old Faithful", "kind": "Quasar", "pos": 8}]) + +When the rows are already positional lists (e.g. data coming from a +DataFrame), only the SQL template is rewritten. In this case the caller must +ensure the value order in each row matches the placeholder order in the SQL: + + >>> cursor.execute( + ... "INSERT INTO locations (name, kind, position) " + ... "VALUES (%(name)s, %(kind)s, %(pos)s)", + ... bulk_parameters=[['Cloverleaf', 'Quasar', 7], + ... ['Old Faithful', 'Quasar', 8]]) + .. _selects: Selecting data diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 7ac7f051..0e331223 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -22,6 +22,7 @@ import typing as t import warnings from datetime import datetime, timedelta, timezone +from itertools import count from .converter import Converter, DataType from .exceptions import ProgrammingError @@ -29,13 +30,19 @@ _NAMED_PARAM_RE = re.compile(r"%\(([^)]+)\)s") +def _rewrite_pyformat_sql(sql: str) -> str: + """Replace %(name)s placeholders with $N positional markers (1-indexed).""" + counter = count(1) + return _NAMED_PARAM_RE.sub(lambda _: f"${next(counter)}", sql) + + def _convert_named_to_positional( sql: str, params: t.Dict[str, t.Any] ) -> t.Tuple[str, t.List[t.Any]]: - """Convert pyformat-style named parameters to positional qmark parameters. + """Convert pyformat-style named parameters to positional parameters. - Converts ``%(name)s`` placeholders to ``?`` and returns an ordered list - of corresponding values extracted from ``params``. + Converts ``%(name)s`` placeholders to ``$N`` (1-indexed) and returns an + ordered list of corresponding values extracted from ``params``. The same name may appear multiple times; each occurrence appends the value to the positional list independently. @@ -47,7 +54,7 @@ def _convert_named_to_positional( sql = "SELECT * FROM t WHERE a = %(a)s AND b = %(b)s" params = {"a": 1, "b": 2} - # returns: ("SELECT * FROM t WHERE a = ? AND b = ?", [1, 2]) + # returns: ("SELECT * FROM t WHERE a = $1 AND b = $2", [1, 2]) """ positions = {} idx = 1 @@ -91,8 +98,8 @@ def _convert_named_bulk_params( for row in seq_of_dicts: if not isinstance(row, dict): raise ProgrammingError( - "executemany() requires all parameter rows to be dicts " - "when the SQL uses pyformat (%(name)s) placeholders" + "All bulk parameter rows must be dicts when SQL uses " + "pyformat (%(name)s) placeholders; got a non-dict row" ) positional: t.List[t.Any] = [None] * n for name, pos in positions.items(): @@ -136,6 +143,13 @@ def execute(self, sql, parameters=None, bulk_parameters=None): if isinstance(parameters, dict): sql, parameters = _convert_named_to_positional(sql, parameters) + elif bulk_parameters is not None and _NAMED_PARAM_RE.search(sql): + if bulk_parameters and isinstance(bulk_parameters[0], dict): + sql, bulk_parameters = _convert_named_bulk_params( + sql, bulk_parameters + ) + else: + sql = _rewrite_pyformat_sql(sql) self._result = self.connection.client.sql( sql, parameters, bulk_parameters diff --git a/tests/client/test_cursor.py b/tests/client/test_cursor.py index ace23c4a..6fb49c20 100644 --- a/tests/client/test_cursor.py +++ b/tests/client/test_cursor.py @@ -203,7 +203,9 @@ def test_executemany_with_mixed_param_types(mocked_connection): parameter sequence mixes dicts and non-dicts while the SQL uses pyformat. """ cursor = mocked_connection.cursor() - with pytest.raises(ProgrammingError, match="requires all parameter rows"): + with pytest.raises( + ProgrammingError, match="All bulk parameter rows must be dicts" + ): cursor.executemany( "INSERT INTO characters (name) VALUES (%(name)s)", [{"name": "Arthur"}, ["Trillian"]], # second row is a list @@ -329,6 +331,68 @@ def test_execute_with_bulk_args(mocked_connection): mocked_connection.client.sql.assert_called_once_with(statement, None, [[1]]) +def test_execute_with_pyformat_sql_and_bulk_parameters(mocked_connection): + """ + cursor.execute() converts %(name)s SQL to $N when bulk_parameters is + provided. Rows are already positional; only the SQL needs conversion. + """ + cursor = mocked_connection.cursor() + sql = "INSERT INTO t (id, val) VALUES (%(id)s, %(val)s)" + bulk = [[1, "hello"], [2, "world"]] + cursor.execute(sql, bulk_parameters=bulk) + mocked_connection.client.sql.assert_called_once_with( + "INSERT INTO t (id, val) VALUES ($1, $2)", None, bulk + ) + + +def test_execute_with_pyformat_sql_and_dict_bulk_parameters(mocked_connection): + """ + cursor.execute() with pyformat SQL and dict-format bulk_parameters converts + both the SQL template (%(x)s → $N) and the rows (dicts → positional lists). + """ + cursor = mocked_connection.cursor() + sql = "INSERT INTO t (id, val) VALUES (%(id)s, %(val)s)" + bulk = [{"id": 1, "val": "hello"}, {"id": 2, "val": "world"}] + cursor.execute(sql, bulk_parameters=bulk) + mocked_connection.client.sql.assert_called_once_with( + "INSERT INTO t (id, val) VALUES ($1, $2)", + None, + [[1, "hello"], [2, "world"]], + ) + + +def test_execute_with_dict_bulk_parameters_mixed_types_raises( + mocked_connection, +): + """ + cursor.execute() raises ProgrammingError when bulk_parameters mixes + dict and non-dict rows with pyformat SQL. + """ + cursor = mocked_connection.cursor() + with pytest.raises( + ProgrammingError, match="All bulk parameter rows must be dicts" + ): + cursor.execute( + "INSERT INTO t (id) VALUES (%(id)s)", + bulk_parameters=[{"id": 1}, [2]], + ) + mocked_connection.client.sql.assert_not_called() + + +def test_execute_with_pyformat_sql_and_bulk_parameters_no_placeholders( + mocked_connection, +): + """ + SQL without %(name)s placeholders is passed through unchanged + even when bulk_parameters is provided. + """ + cursor = mocked_connection.cursor() + sql = "INSERT INTO t (id, val) VALUES (?, ?)" + bulk = [[1, "hello"], [2, "world"]] + cursor.execute(sql, bulk_parameters=bulk) + mocked_connection.client.sql.assert_called_once_with(sql, None, bulk) + + def test_execute_custom_converter(mocked_connection): """ Verify that a custom converter is correctly applied when passed to a cursor.