From 85c1cb677959dd43e4b14b7cffb430cc00ed87cf Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 26 Aug 2024 10:13:11 -0400 Subject: [PATCH 01/15] fix(oracle): avoid double cursor closing by removing unnecessary `close` in `_fetch_from_cursor` (#9913) Previously we were trying to close a cursor after an exception was raised in `raw_sql`, which already closes the cursor in the case of an exception. This is not allowed by the oracledb driver, so just close the cursor on success. --- ibis/backends/__init__.py | 16 ++++++++------ ibis/backends/duckdb/tests/test_client.py | 21 ++++++++++++++++++ ibis/backends/oracle/__init__.py | 15 ++----------- ibis/backends/tests/test_api.py | 12 ++++++++++ ibis/backends/tests/test_temporal.py | 9 ++++---- ibis/common/caching.py | 27 +++++++++-------------- 6 files changed, 58 insertions(+), 42 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 5d6ff016ad20..bbbcb36d9bab 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -7,6 +7,7 @@ import keyword import re import urllib.parse +import weakref from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar @@ -34,6 +35,12 @@ class TablesAccessor(collections.abc.Mapping): """A mapping-like object for accessing tables off a backend. + ::: {.callout-note} + ## The `tables` accessor is tied to the lifetime of the backend. + + If the backend goes out of scope, the `tables` accessor is no longer valid. + ::: + Tables may be accessed by name using either index or attribute access: Examples @@ -804,12 +811,7 @@ def __init__(self, *args, **kwargs): self._con_args: tuple[Any] = args self._con_kwargs: dict[str, Any] = kwargs self._can_reconnect: bool = True - # expression cache - self._query_cache = RefCountedCache( - populate=self._load_into_cache, - lookup=lambda name: self.table(name).op(), - finalize=self._clean_up_cached_table, - ) + self._query_cache = RefCountedCache(weakref.proxy(self)) @property @abc.abstractmethod @@ -1017,7 +1019,7 @@ def tables(self): >>> people = con.tables.people # access via attribute """ - return TablesAccessor(self) + return TablesAccessor(weakref.proxy(self)) @property @abc.abstractmethod diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index c8b4a4e91183..94a0c028a302 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -1,5 +1,6 @@ from __future__ import annotations +import gc import os import subprocess import sys @@ -403,3 +404,23 @@ def test_read_csv_with_types(tmp_path, input, all_varchar): path.write_bytes(data) t = con.read_csv(path, all_varchar=all_varchar, **input) assert t.schema()["geom"].is_geospatial() + + +def test_tables_accessor_no_reference_cycle(): + """Test that a single reference to a connection has the desired lifetime semantics.""" + con = ibis.duckdb.connect() + + before = len(gc.get_referrers(con)) + tables = con.tables + after = len(gc.get_referrers(con)) + + assert after == before + + # valid call, and there are no tables in the database + assert not list(tables) + + del con + + # no longer valid because the backend has been manually decref'd + with pytest.raises(ReferenceError): + list(tables) diff --git a/ibis/backends/oracle/__init__.py b/ibis/backends/oracle/__init__.py index 9005f96c5b82..8e4c46fd51d6 100644 --- a/ibis/backends/oracle/__init__.py +++ b/ibis/backends/oracle/__init__.py @@ -623,19 +623,8 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: from ibis.backends.oracle.converter import OraclePandasData - try: - df = pd.DataFrame.from_records( - cursor, columns=schema.names, coerce_float=True - ) - except Exception: - # clean up the cursor if we fail to create the DataFrame - # - # in the sqlite case failing to close the cursor results in - # artificially locked tables - cursor.close() - raise - df = OraclePandasData.convert_table(df, schema) - return df + df = pd.DataFrame.from_records(cursor, columns=schema.names, coerce_float=True) + return OraclePandasData.convert_table(df, schema) def _clean_up_tmp_table(self, name: str) -> None: with self.begin() as bind: diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 4b71bdb4ffee..074c288933da 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -1,5 +1,7 @@ from __future__ import annotations +import gc + import pytest from pytest import param @@ -115,6 +117,16 @@ def test_tables_accessor_repr(con): assert f"- {name}" in result +def test_tables_accessor_no_reference_cycle(con): + before = len(gc.get_referrers(con)) + _ = con.tables + after = len(gc.get_referrers(con)) + + # assert that creating a `tables` accessor object doesn't increase the + # number of strong references + assert after == before + + @pytest.mark.parametrize( "expr_fn", [ diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 1c9ba8f027eb..32334e8f587c 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -29,7 +29,6 @@ MySQLOperationalError, MySQLProgrammingError, OracleDatabaseError, - OracleInterfaceError, PolarsInvalidOperationError, PolarsPanicException, PsycoPg2InternalError, @@ -505,8 +504,8 @@ def test_date_truncate(backend, alltypes, df, unit): ), pytest.mark.notyet( ["oracle"], - raises=OracleInterfaceError, - reason="cursor not open, probably a bug in the sql generated", + raises=OracleDatabaseError, + reason="ORA-01839: date not valid for month specified", ), sqlite_without_ymd_intervals, ], @@ -633,8 +632,8 @@ def convert_to_offset(offset, displacement_type=displacement_type): ), pytest.mark.notyet( ["oracle"], - raises=OracleInterfaceError, - reason="cursor not open, probably a bug in the sql generated", + raises=OracleDatabaseError, + reason="ORA-01839: date not valid for month specified", ), sqlite_without_ymd_intervals, ], diff --git a/ibis/common/caching.py b/ibis/common/caching.py index 006f57231059..14e1e569b50d 100644 --- a/ibis/common/caching.py +++ b/ibis/common/caching.py @@ -2,9 +2,9 @@ import functools import sys +import weakref from collections import namedtuple from typing import TYPE_CHECKING, Any -from weakref import finalize, ref if TYPE_CHECKING: from collections.abc import Callable @@ -39,17 +39,8 @@ class RefCountedCache: We can implement that interface if and when we need to. """ - def __init__( - self, - *, - populate: Callable[[str, Any], None], - lookup: Callable[[str], Any], - finalize: Callable[[Any], None], - ) -> None: - self.populate = populate - self.lookup = lookup - self.finalize = finalize - + def __init__(self, backend: weakref.proxy) -> None: + self.backend = backend self.cache: dict[Any, CacheEntry] = dict() def get(self, key, default=None): @@ -70,11 +61,13 @@ def store(self, input): key = input.op() name = gen_name("cache") - self.populate(name, input) - cached = self.lookup(name) - finalizer = finalize(cached, self._release, key) - self.cache[key] = CacheEntry(name, ref(cached), finalizer) + self.backend._load_into_cache(name, input) + + cached = self.backend.table(name).op() + finalizer = weakref.finalize(cached, self._release, key) + + self.cache[key] = CacheEntry(name, weakref.ref(cached), finalizer) return cached @@ -88,7 +81,7 @@ def release(self, name: str) -> None: def _release(self, key) -> None: entry = self.cache.pop(key) try: - self.finalize(entry.name) + self.backend._clean_up_cached_table(entry.name) except Exception: # suppress exceptions during interpreter shutdown if not sys.is_finalizing(): From a718ff36951e1426ba5e41195aa54bfd92a84c3f Mon Sep 17 00:00:00 2001 From: ncclementi Date: Wed, 24 Jul 2024 11:51:31 -0400 Subject: [PATCH 02/15] refactor(api): duckdb ddl accessor implementation --- ibis/backends/__init__.py | 232 ++++++++++++++---- ibis/backends/duckdb/__init__.py | 139 ++++++----- ibis/backends/duckdb/tests/test_catalog.py | 12 +- ibis/backends/duckdb/tests/test_client.py | 35 +-- ibis/backends/duckdb/tests/test_geospatial.py | 5 +- ibis/backends/duckdb/tests/test_register.py | 4 +- ibis/backends/tests/test_api.py | 82 +++++-- ibis/backends/tests/test_client.py | 12 +- ibis/backends/tests/test_expr_caching.py | 4 +- ibis/backends/tests/test_register.py | 6 +- ibis/common/caching.py | 27 +- ibis/examples/tests/test_examples.py | 2 +- 12 files changed, 368 insertions(+), 192 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index bbbcb36d9bab..4c3524ba8b09 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -7,7 +7,6 @@ import keyword import re import urllib.parse -import weakref from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar @@ -35,12 +34,6 @@ class TablesAccessor(collections.abc.Mapping): """A mapping-like object for accessing tables off a backend. - ::: {.callout-note} - ## The `tables` accessor is tied to the lifetime of the backend. - - If the backend goes out of scope, the `tables` accessor is no longer valid. - ::: - Tables may be accessed by name using either index or attribute access: Examples @@ -53,6 +46,42 @@ class TablesAccessor(collections.abc.Mapping): def __init__(self, backend: BaseBackend) -> None: self._backend = backend + def _execute_if_exists( + self, method_name: str, database=None, like=None + ) -> list[str]: + """Executes method if it exists and it doesn't raise a NotImplementedError, else returns an empty list.""" + method = getattr(self._backend.ddl, method_name) + if callable(method): + try: + return method(database=database, like=like) + except NotImplementedError: + pass + return [] + + def _gather_tables(self, database=None, like=None) -> list[str]: + """Gathers table names using the list_* methods available on the backend.""" + # TODO: break this down into views/tables to be more explicit in repr (see #9859) + # list_* methods that might exist on a given backends. + list_methods = [ + "list_tables", + "list_temp_tables", + "list_views", + "list_temp_views", + ] + tables = [] + for method_name in list_methods: + tables.extend( + self._execute_if_exists(method_name, database=database, like=like) + ) + return list(set(tables)) + + def __call__(self, database=None, like=None): + return self._gather_tables(database, like) + + @property + def _tables(self) -> list[str]: + return self._gather_tables() + def __getitem__(self, name) -> ir.Table: try: return self._backend.table(name) @@ -68,29 +97,154 @@ def __getattr__(self, name) -> ir.Table: raise AttributeError(name) from exc def __iter__(self) -> Iterator[str]: - return iter(sorted(self._backend.list_tables())) + return iter(sorted(self._tables)) def __len__(self) -> int: - return len(self._backend.list_tables()) + return len(self._tables) def __dir__(self) -> list[str]: o = set() o.update(dir(type(self))) o.update( name - for name in self._backend.list_tables() + for name in self._tables if name.isidentifier() and not keyword.iskeyword(name) ) return list(o) def __repr__(self) -> str: - tables = self._backend.list_tables() rows = ["Tables", "------"] - rows.extend(f"- {name}" for name in sorted(tables)) + rows.extend(f"- {name}" for name in sorted(self._tables)) return "\n".join(rows) def _ipython_key_completions_(self) -> list[str]: - return self._backend.list_tables() + return self._tables + + +class DDLAccessor: + """ddl accessor list views.""" + + def __init__(self, backend: BaseBackend) -> None: + self._backend = backend + + def _raise_if_not_implemented(self, method_name: str): + method = getattr(self._backend, method_name) + if not callable(method): + raise NotImplementedError( + f"The method {method_name} is not implemented for the {self._backend.name} backend" + ) + + def list_tables( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of table names in a database via the backend's implementation. + + ::: {.callout-note} + ## Ibis does not use the word `schema` to refer to database hierarchy. + + A collection of tables is referred to as a `database`. + A collection of `database` is referred to as a `catalog`. + + These terms are mapped onto the corresponding features in each + backend (where available), regardless of whether the backend itself + uses the same terminology. + ::: + + Parameters + ---------- + like + A pattern to use for listing tables. + database + Database to list tables from. Default behavior is to show tables in + the current database. + """ + + self._raise_if_not_implemented("_list_tables") + return self._backend._list_tables(like=like, database=database) + + def list_temp_tables( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of temporary table names in a database via the backend's implementation. + + ::: {.callout-note} + ## Ibis does not use the word `schema` to refer to database hierarchy. + + A collection of tables is referred to as a `database`. + A collection of `database` is referred to as a `catalog`. + + These terms are mapped onto the corresponding features in each + backend (where available), regardless of whether the backend itself + uses the same terminology. + ::: + + Parameters + ---------- + like + A pattern to use for listing tables. + database + Database to list tables from. Default behavior is to show tables in + the current database. + """ + + self._raise_if_not_implemented("_list_temp_tables") + return self._backend._list_temp_tables(like=like, database=database) + + def list_views( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of view names in a database via the backend's implementation. + + ::: {.callout-note} + ## Ibis does not use the word `schema` to refer to database hierarchy. + + A collection of tables is referred to as a `database`. + A collection of `database` is referred to as a `catalog`. + + These terms are mapped onto the corresponding features in each + backend (where available), regardless of whether the backend itself + uses the same terminology. + ::: + + Parameters + ---------- + like + A pattern to use for listing tables. + database + Database to list tables from. Default behavior is to show tables in + the current database. + """ + + self._raise_if_not_implemented("_list_views") + return self._backend._list_views(like=like, database=database) + + def list_temp_views( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of temporary view names in a database via the backend's implementation. + + ::: {.callout-note} + ## Ibis does not use the word `schema` to refer to database hierarchy. + + A collection of tables is referred to as a `database`. + A collection of `database` is referred to as a `catalog`. + + These terms are mapped onto the corresponding features in each + backend (where available), regardless of whether the backend itself + uses the same terminology. + ::: + + Parameters + ---------- + like + A pattern to use for listing tables. + database + Database to list tables from. Default behavior is to show tables in + the current database. + """ + + self._raise_if_not_implemented("_list_temp_views") + return self._backend._list_temp_views(like=like, database=database) class _FileIOHandler: @@ -811,7 +965,12 @@ def __init__(self, *args, **kwargs): self._con_args: tuple[Any] = args self._con_kwargs: dict[str, Any] = kwargs self._can_reconnect: bool = True - self._query_cache = RefCountedCache(weakref.proxy(self)) + # expression cache + self._query_cache = RefCountedCache( + populate=self._load_into_cache, + lookup=lambda name: self.table(name).op(), + finalize=self._clean_up_cached_table, + ) @property @abc.abstractmethod @@ -933,44 +1092,6 @@ def _filter_with_like(values: Iterable[str], like: str | None = None) -> list[st pattern = re.compile(like) return sorted(filter(pattern.findall, values)) - @abc.abstractmethod - def list_tables( - self, like: str | None = None, database: tuple[str, str] | str | None = None - ) -> list[str]: - """Return the list of table names in the current database. - - For some backends, the tables may be files in a directory, - or other equivalent entities in a SQL database. - - ::: {.callout-note} - ## Ibis does not use the word `schema` to refer to database hierarchy. - - A collection of tables is referred to as a `database`. - A collection of `database` is referred to as a `catalog`. - - These terms are mapped onto the corresponding features in each - backend (where available), regardless of whether the backend itself - uses the same terminology. - ::: - - Parameters - ---------- - like - A pattern in Python's regex format. - database - The database from which to list tables. - If not provided, the current database is used. - For backends that support multi-level table hierarchies, you can - pass in a dotted string path like `"catalog.database"` or a tuple of - strings like `("catalog", "database")`. - - Returns - ------- - list[str] - The list of the table names that match the pattern `like`. - - """ - @abc.abstractmethod def table( self, name: str, database: tuple[str, str] | str | None = None @@ -1019,7 +1140,12 @@ def tables(self): >>> people = con.tables.people # access via attribute """ - return TablesAccessor(weakref.proxy(self)) + return TablesAccessor(self) + + @property + def ddl(self): + """A ddl accessor.""" + return DDLAccessor(self) @property @abc.abstractmethod diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 13890d1b5d01..dbe402e90b51 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -977,85 +977,102 @@ def read_delta( self.con.register(table_name, delta_table.to_pyarrow_dataset()) return self.table(table_name) - def list_tables( + def _list_query_constructor(self, col: str, where_predicates: list) -> str: + """Helper function to construct sqlglot queries for _list_* methods.""" + + sg_query = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .where(*where_predicates) + ).sql(self.name) + + return sg_query + + def _list_objects( + self, + like: str | None, + database: tuple[str, str] | str | None, + object_type: str, + is_temp: bool = False, + ) -> list[str]: + """Generic method to list objects like tables or views.""" + + table_loc = self._warn_and_create_table_loc(database) + + catalog = table_loc.catalog or ("temp" if is_temp else self.current_catalog) + database = table_loc.db or self.current_database + + col = "table_name" + where_predicates = [ + C.table_catalog.eq(sge.convert(catalog)), + C.table_schema.eq(sge.convert(database)), + C.table_type.eq(object_type), + ] + + sql = self._list_query_constructor(col, where_predicates) + out = self.con.execute(sql).fetch_arrow_table() + + return self._filter_with_like(out[col].to_pylist(), like) + + def _list_tables( self, like: str | None = None, database: tuple[str, str] | str | None = None, - schema: str | None = None, ) -> list[str]: - """List tables and views. + """List physical tables.""" - ::: {.callout-note} - ## Ibis does not use the word `schema` to refer to database hierarchy. + return self._list_objects(like, database, "BASE TABLE") - A collection of tables is referred to as a `database`. - A collection of `database` is referred to as a `catalog`. + def _list_temp_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary tables.""" - These terms are mapped onto the corresponding features in each - backend (where available), regardless of whether the backend itself - uses the same terminology. - ::: + return self._list_objects(like, database, "LOCAL TEMPORARY", is_temp=True) - Parameters - ---------- - like - Regex to filter by table/view name. - database - Database location. If not passed, uses the current database. + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" - By default uses the current `database` (`self.current_database`) and - `catalog` (`self.current_catalog`). + return self._list_objects(like, database, "VIEW") - To specify a table in a separate catalog, you can pass in the - catalog and database as a string `"catalog.database"`, or as a tuple of - strings `("catalog", "database")`. - schema - [deprecated] Schema name. If not passed, uses the current schema. + def _list_temp_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary views.""" - Returns - ------- - list[str] - List of table and view names. + return self._list_objects(like, database, "VIEW", is_temp=True) - Examples - -------- - >>> import ibis - >>> con = ibis.duckdb.connect() - >>> foo = con.create_table("foo", schema=ibis.schema(dict(a="int"))) - >>> con.list_tables() - ['foo'] - >>> bar = con.create_view("bar", foo) - >>> con.list_tables() - ['bar', 'foo'] - >>> con.create_database("my_database") - >>> con.list_tables(database="my_database") - [] - >>> with con.begin() as c: - ... c.exec_driver_sql("CREATE TABLE my_database.baz (a INTEGER)") # doctest: +ELLIPSIS - <...> - >>> con.list_tables(database="my_database") - ['baz'] + @deprecated(as_of="10.0", instead="use the con.tables") + def list_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + schema: str | None = None, + ) -> list[str]: + """List tables and views.""" - """ table_loc = self._warn_and_create_table_loc(database, schema) - catalog = table_loc.catalog or self.current_catalog - database = table_loc.db or self.current_database + database = self.current_database + if table_loc is not None: + database = table_loc.db or database - col = "table_name" - sql = ( - sg.select(col) - .from_(sg.table("tables", db="information_schema")) - .distinct() - .where( - C.table_catalog.isin(sge.convert(catalog), sge.convert("temp")), - C.table_schema.eq(sge.convert(database)), - ) - .sql(self.dialect) + tables_and_views = list( + set(self._list_tables(like=like, database=database)) + | set(self._list_temp_tables(like=like, database=database)) + | set(self._list_views(like=like, database=database)) + | set(self._list_temp_views(like=like, database=database)) ) - out = self.con.execute(sql).fetch_arrow_table() - return self._filter_with_like(out[col].to_pylist(), like) + return tables_and_views def read_postgres( self, uri: str, *, table_name: str | None = None, database: str = "public" diff --git a/ibis/backends/duckdb/tests/test_catalog.py b/ibis/backends/duckdb/tests/test_catalog.py index a59c04ee3743..a5a96d99b81a 100644 --- a/ibis/backends/duckdb/tests/test_catalog.py +++ b/ibis/backends/duckdb/tests/test_catalog.py @@ -36,8 +36,8 @@ def test_read_write_external_catalog(con, external_duckdb_file, monkeypatch): assert "ext" in con.list_catalogs() assert "main" in con.list_databases(catalog="ext") - assert "starwars" in con.list_tables(database="ext.main") - assert "starwars" not in con.list_tables() + assert "starwars" in con.ddl.list_tables(database="ext.main") + assert "starwars" not in con.ddl.list_tables() starwars = con.table("starwars", database="ext.main") tm.assert_frame_equal(starwars.to_pandas(), starwars_df) @@ -47,8 +47,8 @@ def test_read_write_external_catalog(con, external_duckdb_file, monkeypatch): _ = con.create_table("t2", obj=t, database="ext.main") - assert "t2" in con.list_tables(database="ext.main") - assert "t2" not in con.list_tables() + assert "t2" in con.ddl.list_tables(database="ext.main") + assert "t2" not in con.ddl.list_tables() table = con.table("t2", database="ext.main") @@ -60,8 +60,8 @@ def test_read_write_external_catalog(con, external_duckdb_file, monkeypatch): _ = con.create_table("t2", obj=t_overwrite, database="ext.main", overwrite=True) - assert "t2" in con.list_tables(database="ext.main") - assert "t2" not in con.list_tables() + assert "t2" in con.ddl.list_tables(database="ext.main") + assert "t2" not in con.ddl.list_tables() table = con.table("t2", database="ext.main") diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index 94a0c028a302..55dc6296cdbc 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -1,6 +1,5 @@ from __future__ import annotations -import gc import os import subprocess import sys @@ -293,15 +292,15 @@ def test_list_tables_schema_warning_refactor(con): "diamonds", "functional_alltypes", "win", - }.issubset(con.list_tables()) + }.issubset(con.tables) icecream_table = ["ice_cream"] with pytest.warns(FutureWarning): assert con.list_tables(schema="shops") == icecream_table - assert con.list_tables(database="shops") == icecream_table - assert con.list_tables(database=("shops",)) == icecream_table + assert con.ddl.list_tables(database="shops") == icecream_table + assert con.ddl.list_tables(database=("shops",)) == icecream_table def test_settings_repr(): @@ -315,16 +314,16 @@ def test_connect_named_in_memory_db(): con_named_db = ibis.duckdb.connect(":memory:mydb") con_named_db.create_table("ork", schema=ibis.schema(dict(bork="int32"))) - assert "ork" in con_named_db.list_tables() + assert "ork" in con_named_db.tables con_named_db_2 = ibis.duckdb.connect(":memory:mydb") - assert "ork" in con_named_db_2.list_tables() + assert "ork" in con_named_db_2.tables unnamed_memory_db = ibis.duckdb.connect(":memory:") - assert "ork" not in unnamed_memory_db.list_tables() + assert "ork" not in unnamed_memory_db.tables default_memory_db = ibis.duckdb.connect() - assert "ork" not in default_memory_db.list_tables() + assert "ork" not in default_memory_db.tables @pytest.mark.parametrize( @@ -404,23 +403,3 @@ def test_read_csv_with_types(tmp_path, input, all_varchar): path.write_bytes(data) t = con.read_csv(path, all_varchar=all_varchar, **input) assert t.schema()["geom"].is_geospatial() - - -def test_tables_accessor_no_reference_cycle(): - """Test that a single reference to a connection has the desired lifetime semantics.""" - con = ibis.duckdb.connect() - - before = len(gc.get_referrers(con)) - tables = con.tables - after = len(gc.get_referrers(con)) - - assert after == before - - # valid call, and there are no tables in the database - assert not list(tables) - - del con - - # no longer valid because the backend has been manually decref'd - with pytest.raises(ReferenceError): - list(tables) diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py index 2c748c1d9f48..d62004751c61 100644 --- a/ibis/backends/duckdb/tests/test_geospatial.py +++ b/ibis/backends/duckdb/tests/test_geospatial.py @@ -263,12 +263,9 @@ def test_geospatial_flip_coordinates(geotable): def test_create_table_geospatial_types(geotable, con): name = ibis.util.gen_name("geotable") - - # con = ibis.get_backend(geotable) - t = con.create_table(name, geotable, temp=True) - assert t.op().name in con.list_tables() + assert t.op().name in con.tables assert any(map(methodcaller("is_geospatial"), t.schema().values())) diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 5f4a79564bfa..93e008d554c0 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -292,7 +292,7 @@ def test_attach_sqlite(data_dir, tmp_path): con = ibis.duckdb.connect() con.attach_sqlite(test_db_path) - assert set(con.list_tables()) >= { + assert set(con.tables) >= { "functional_alltypes", "awards_players", "batting", @@ -304,7 +304,7 @@ def test_attach_sqlite(data_dir, tmp_path): # overwrite existing sqlite_db and force schema to all strings con.attach_sqlite(test_db_path, overwrite=True, all_varchar=True) - assert set(con.list_tables()) >= { + assert set(con.tables) >= { "functional_alltypes", "awards_players", "batting", diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 074c288933da..8d93eca617ce 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -1,12 +1,13 @@ from __future__ import annotations -import gc - import pytest +import sqlglot as sg +import sqlglot.expressions as sge from pytest import param import ibis.expr.types as ir from ibis.backends.conftest import TEST_TABLES +from ibis.backends.sql.compilers.base import STAR from ibis.backends.tests.errors import PyDruidProgrammingError @@ -61,9 +62,9 @@ def test_catalog_consistency(backend, con): assert current_catalog in catalogs -def test_list_tables(con): - tables = con.list_tables() - assert isinstance(tables, list) +def test_list_all_tables_and_views(con): + tables = con.tables + # only table that is guaranteed to be in all backends key = "functional_alltypes" assert key in tables or key.upper() in tables @@ -84,7 +85,7 @@ def test_tables_accessor_mapping(con): # temporary might pop into existence in parallel test runs, in between the # first `list_tables` call and the second, so we check that there's a # non-empty intersection - assert TEST_TABLES.keys() & set(map(str.lower, con.list_tables())) + assert TEST_TABLES.keys() & set(map(str.lower, con.ddl.list_tables())) assert TEST_TABLES.keys() & set(map(str.lower, con.tables)) @@ -117,16 +118,6 @@ def test_tables_accessor_repr(con): assert f"- {name}" in result -def test_tables_accessor_no_reference_cycle(con): - before = len(gc.get_referrers(con)) - _ = con.tables - after = len(gc.get_referrers(con)) - - # assert that creating a `tables` accessor object doesn't increase the - # number of strong references - assert after == before - - @pytest.mark.parametrize( "expr_fn", [ @@ -154,3 +145,62 @@ def test_unbind(alltypes, expr_fn): assert "Unbound" not in repr(expr) assert "Unbound" in repr(expr.unbind()) + + +## works on duckdb only for now +def test_list_tables(ddl_con): + # should check only physical tables + table_name = "functional_alltypes" + tables = ddl_con.ddl.list_tables() + assert isinstance(tables, list) + assert table_name in tables + + assert table_name not in ddl_con.ddl.list_views() + assert table_name not in ddl_con.ddl.list_temp_tables() + assert table_name not in ddl_con.ddl.list_temp_views() + + +def test_list_views(ddl_con, temp_view): + # temp_view: view name + expr = ddl_con.table("functional_alltypes") + ddl_con.create_view(temp_view, expr) + + views = ddl_con.ddl.list_views() + + assert isinstance(views, list) + assert temp_view in views + assert temp_view not in ddl_con.ddl.list_tables() + assert temp_view not in ddl_con.ddl.list_temp_tables() + assert temp_view not in ddl_con.ddl.list_temp_views() + + +def test_list_temp_tables(ddl_con): + expr = ddl_con.table("functional_alltypes") + temp_table_name = "all_types_temp" + ddl_con.create_table(temp_table_name, expr, temp=True) + temp_tables = ddl_con.ddl.list_temp_tables() + + assert isinstance(temp_tables, list) + assert temp_table_name in temp_tables + assert temp_table_name not in ddl_con.ddl.list_views() + assert temp_table_name not in ddl_con.ddl.list_temp_views() + assert temp_table_name not in ddl_con.ddl.list_tables() + + +def test_list_temp_views(ddl_con): + # TODO: replace raw_sql with create_temp + + temp_view = sge.Create( + this=sg.table("temp_view_example"), + kind="VIEW", + expression=sg.select(STAR).from_(sg.table("functional_alltypes")), + properties=sge.Properties(expressions=[sge.TemporaryProperty()]), + ) + ddl_con.raw_sql(temp_view.sql(ddl_con.dialect)) + temporary_views = ddl_con.ddl.list_temp_views() + + assert isinstance(temporary_views, list) + assert "temp_view_example" in temporary_views + assert "temp_view_example" not in ddl_con.ddl.list_tables() + assert "temp_view_example" not in ddl_con.ddl.list_views() + assert "temp_view_example" not in ddl_con.ddl.list_temp_tables() diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 432f0be2836b..a56629cbb5f5 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -329,7 +329,7 @@ def test_create_temporary_table_from_schema(con_no_data, new_schema): con_no_data.reconnect() # verify table no longer exist after reconnect - assert temp_table not in con_no_data.list_tables() + assert temp_table not in con_no_data.tables @mark.notimpl( @@ -397,7 +397,7 @@ def test_nullable_input_output(con, temp_table): def test_create_drop_view(ddl_con, temp_view): # setup table_name = "functional_alltypes" - tables = ddl_con.list_tables() + tables = ddl_con.tables if table_name in tables or (table_name := table_name.upper()) in tables: expr = ddl_con.table(table_name) @@ -409,7 +409,7 @@ def test_create_drop_view(ddl_con, temp_view): # create a new view ddl_con.create_view(temp_view, expr) # check if the view was created - assert temp_view in ddl_con.list_tables() + assert temp_view in ddl_con.ddl.list_views() t_expr = ddl_con.table(table_name) v_expr = ddl_con.table(temp_view) @@ -452,7 +452,7 @@ def employee_data_1_temp_table(backend, con, test_employee_schema): _create_temp_table_with_schema( backend, con, temp_table_name, test_employee_schema, data=test_employee_data_1 ) - assert temp_table_name in con.list_tables() + assert temp_table_name in con.tables yield temp_table_name con.drop_table(temp_table_name, force=True) @@ -725,7 +725,7 @@ def test_unsigned_integer_type(con, temp_table): schema=ibis.schema(dict(a="uint8", b="uint16", c="uint32", d="uint64")), overwrite=True, ) - assert temp_table in con.list_tables() + assert temp_table in con.tables @pytest.mark.backend @@ -1047,7 +1047,7 @@ def test_create_table_in_memory(con, obj, table_name, monkeypatch): t = con.create_table(table_name, obj) result = pa.table({"a": ["a"], "b": [1]}) - assert table_name in con.list_tables() + assert table_name in con.tables assert result.equals(t.to_pyarrow()) diff --git a/ibis/backends/tests/test_expr_caching.py b/ibis/backends/tests/test_expr_caching.py index d2623b0a593d..84fb19618091 100644 --- a/ibis/backends/tests/test_expr_caching.py +++ b/ibis/backends/tests/test_expr_caching.py @@ -100,7 +100,7 @@ def test_persist_expression_multiple_refs(backend, con, alltypes): assert op not in con._query_cache.cache # assert that table has been dropped - assert name not in con.list_tables() + assert name not in con.tables @mark.notimpl(["datafusion", "flink", "impala", "trino", "druid"]) @@ -126,7 +126,7 @@ def test_persist_expression_repeated_cache(alltypes, con): del nested_cached_table, cached_table - assert name not in con.list_tables() + assert name not in con.tables @mark.notimpl(["datafusion", "flink", "impala", "trino", "druid"]) diff --git a/ibis/backends/tests/test_register.py b/ibis/backends/tests/test_register.py index cdfa1683743f..7b8801bc8485 100644 --- a/ibis/backends/tests/test_register.py +++ b/ibis/backends/tests/test_register.py @@ -103,7 +103,7 @@ def test_register_csv(con, data_dir, fname, in_table_name, out_table_name): with pytest.warns(FutureWarning, match="v9.1"): table = con.register(fname, table_name=in_table_name) - assert any(out_table_name in t for t in con.list_tables()) + assert any(out_table_name in t for t in con.tables) if con.name != "datafusion": table.count().execute() @@ -226,7 +226,7 @@ def test_register_parquet( with pytest.warns(FutureWarning, match="v9.1"): table = con.register(f"parquet://{fname.name}", table_name=in_table_name) - assert any(out_table_name in t for t in con.list_tables()) + assert any(out_table_name in t for t in con.tables) if con.name != "datafusion": table.count().execute() @@ -273,7 +273,7 @@ def test_register_iterator_parquet( table_name=None, ) - assert any("ibis_read_parquet" in t for t in con.list_tables()) + assert any("ibis_read_parquet" in t for t in con.tables) assert table.count().execute() diff --git a/ibis/common/caching.py b/ibis/common/caching.py index 14e1e569b50d..006f57231059 100644 --- a/ibis/common/caching.py +++ b/ibis/common/caching.py @@ -2,9 +2,9 @@ import functools import sys -import weakref from collections import namedtuple from typing import TYPE_CHECKING, Any +from weakref import finalize, ref if TYPE_CHECKING: from collections.abc import Callable @@ -39,8 +39,17 @@ class RefCountedCache: We can implement that interface if and when we need to. """ - def __init__(self, backend: weakref.proxy) -> None: - self.backend = backend + def __init__( + self, + *, + populate: Callable[[str, Any], None], + lookup: Callable[[str], Any], + finalize: Callable[[Any], None], + ) -> None: + self.populate = populate + self.lookup = lookup + self.finalize = finalize + self.cache: dict[Any, CacheEntry] = dict() def get(self, key, default=None): @@ -61,13 +70,11 @@ def store(self, input): key = input.op() name = gen_name("cache") + self.populate(name, input) + cached = self.lookup(name) + finalizer = finalize(cached, self._release, key) - self.backend._load_into_cache(name, input) - - cached = self.backend.table(name).op() - finalizer = weakref.finalize(cached, self._release, key) - - self.cache[key] = CacheEntry(name, weakref.ref(cached), finalizer) + self.cache[key] = CacheEntry(name, ref(cached), finalizer) return cached @@ -81,7 +88,7 @@ def release(self, name: str) -> None: def _release(self, key) -> None: entry = self.cache.pop(key) try: - self.backend._clean_up_cached_table(entry.name) + self.finalize(entry.name) except Exception: # suppress exceptions during interpreter shutdown if not sys.is_finalizing(): diff --git a/ibis/examples/tests/test_examples.py b/ibis/examples/tests/test_examples.py index 40d1e2bf4afe..a2eeee4affd0 100644 --- a/ibis/examples/tests/test_examples.py +++ b/ibis/examples/tests/test_examples.py @@ -84,7 +84,7 @@ def test_non_example(): def test_backend_arg(): con = ibis.duckdb.connect() t = ibis.examples.penguins.fetch(backend=con) - assert t.get_name() in con.list_tables() + assert t.get_name() in con.tables @pytest.mark.duckdb From e3542485c2e1b1087ae1a2c0b6f6bbafeaed3f84 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Wed, 28 Aug 2024 17:13:31 -0400 Subject: [PATCH 03/15] refactor(api): postgres ddl accessor implementation --- ibis/backends/__init__.py | 18 ++ ibis/backends/postgres/__init__.py | 238 +++++++++++++------- ibis/backends/postgres/tests/test_client.py | 34 ++- ibis/backends/tests/test_api.py | 2 +- 4 files changed, 208 insertions(+), 84 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 4c3524ba8b09..73bd501a86fb 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -67,6 +67,7 @@ def _gather_tables(self, database=None, like=None) -> list[str]: "list_temp_tables", "list_views", "list_temp_views", + "list_foreign_tables", ] tables = [] for method_name in list_methods: @@ -246,6 +247,23 @@ def list_temp_views( self._raise_if_not_implemented("_list_temp_views") return self._backend._list_temp_views(like=like, database=database) + def list_foreign_tables( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + """Return the list of foreign table names via the backend's implementation. + + Parameters + ---------- + like + A pattern to use for listing tables. + database + Database to list foreign tables from. Default behavior is to show tables in + the current database. + """ + + self._raise_if_not_implemented("_list_foreign_tables") + return self._backend._list_foreign_tables(like=like, database=database) + class _FileIOHandler: @staticmethod diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index eed7c5eb86c9..38c7eb3fcaba 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -23,7 +23,8 @@ from ibis import util from ibis.backends import CanCreateDatabase, CanCreateSchema, CanListCatalog from ibis.backends.sql import SQLBackend -from ibis.backends.sql.compilers.base import TRUE, C, ColGen, F +from ibis.backends.sql.compilers.base import C, ColGen, F +from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Callable @@ -100,7 +101,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: ) # only register if we haven't already done so - if (name := op.name) not in self.list_tables(): + if (name := op.name) not in self.tables: quoted = self.compiler.quoted column_defs = [ sg.exp.ColumnDef( @@ -305,102 +306,183 @@ def _session_temp_db(self) -> str | None: return res[0] return res - def list_tables( + def _list_query_constructor( + self, col: str, where_predicates: list, tov: str + ) -> str: + """Helper function to construct sqlglot queries for _list_* methods.""" + + sg_query = ( + sg.select(col) + .from_(sg.table(tov, db="information_schema")) + .where(*where_predicates) + ).sql(self.name) + + return sg_query + + def _list_objects( + self, + like: str | None, + database: tuple[str, str] | str | None, + object_type: str, + is_temp: bool = False, + ) -> list[str]: + """Generic method to list objects like tables, temporary tables or views. + + (not used for temporary views) + """ + + table_loc = self._warn_and_create_table_loc(database) + + catalog = table_loc.catalog or self.current_catalog + # temporary tables and views are in a separate postgres schema + # that are of the form pg_temp_* where * is a number + database = table_loc.db or ("pg_temp_%" if is_temp else self.current_database) + col = "table_name" + + where_predicates = [ + C.table_catalog.eq(sge.convert(catalog)), + C.table_schema.like(sge.convert(database)) + if is_temp + else C.table_schema.eq(sge.convert(database)), + C.table_type.eq(object_type), + ] + + sql = self._list_query_constructor(col, where_predicates, tov="tables") + + with self._safe_raw_sql(sql) as cur: + out = cur.fetchall() + return self._filter_with_like(map(itemgetter(0), out), like) + + def _list_tables( self, like: str | None = None, - schema: str | None = None, database: tuple[str, str] | str | None = None, ) -> list[str]: - """List the tables in the database. + """List physical tables.""" - ::: {.callout-note} - ## Ibis does not use the word `schema` to refer to database hierarchy. + return self._list_objects(like, database, "BASE TABLE") - A collection of tables is referred to as a `database`. - A collection of `database` is referred to as a `catalog`. + def _list_temp_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary tables.""" - These terms are mapped onto the corresponding features in each - backend (where available), regardless of whether the backend itself - uses the same terminology. - ::: + # Avoid the problem of having temp views showing on the + # when listing table_type "LOCAL TEMPORARY" in information schema table - Parameters - ---------- - like - A pattern to use for listing tables. - schema - [deprecated] The schema to perform the list against. - database - Database to list tables from. Default behavior is to show tables in - the current database. - """ - if schema is not None: - self._warn_schema() - - if schema is not None and database is not None: - raise ValueError( - "Using both the `schema` and `database` kwargs is not supported. " - "`schema` is deprecated and will be removed in Ibis 10.0" - "\nUse the `database` kwarg with one of the following patterns:" - '\ndatabase="database"' - '\ndatabase=("catalog", "database")' - '\ndatabase="catalog.database"', - ) - elif schema is not None: - table_loc = schema - elif database is not None: - table_loc = database - else: - table_loc = (self.current_catalog, self.current_database) - - table_loc = self._to_sqlglot_table(table_loc) - - conditions = [TRUE] - - if (db := table_loc.args["db"]) is not None: - db.args["quoted"] = False - db = db.sql(dialect=self.name) - conditions.append(C.table_schema.eq(sge.convert(db))) - if (catalog := table_loc.args["catalog"]) is not None: - catalog.args["quoted"] = False - catalog = catalog.sql(dialect=self.name) - conditions.append(C.table_catalog.eq(sge.convert(catalog))) - - sql = ( - sg.select("table_name") - .from_(sg.table("tables", db="information_schema")) - .distinct() - .where(*conditions) - .sql(self.dialect) + temp_tables_and_views = set( + self._list_objects(like, database, "LOCAL TEMPORARY", is_temp=True) ) + temp_views_only = set(self._list_temp_views(like, database)) + + # I don't like this ^ but I the only other way I found of doing it is + # looking into pg_class where relkind is 'r' + # and relnamespace is in the pg_namespace table where we can search for + # nspname like 'pg_temp_%' (this involves two different tables, not sure is a good idea) + # the sql is + # SELECT + # relname AS table_name + # FROM + # pg_class + # WHERE + # relkind = 'r' -- 'r' stands for ordinary tables + # AND relnamespace IN ( + # SELECT oid + # FROM pg_namespace + # WHERE nspname LIKE 'pg_temp_%' + # ); + + return list(temp_tables_and_views - temp_views_only) + + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" + + return self._list_objects(like, database, "VIEW") + + def _list_temp_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary views.""" + + table_loc = self._warn_and_create_table_loc(database) + + catalog = table_loc.catalog or self.current_catalog + # temporary tables and views are in a separate postgres table_schema that + # are of the form pg_temp_* where * is a number + database = table_loc.db or "pg_temp_%" + col = "table_name" + + # information_schema.views doesn't have a table_type to check + # we guarantee it's a temp view because the table_schema is pg_temp_* and + # we are checking only for views + where_predicates = [ + C.table_catalog.eq(sge.convert(catalog)), + C.table_schema.like(sge.convert(database)), + ] + + sql = self._list_query_constructor(col, where_predicates, tov="views") with self._safe_raw_sql(sql) as cur: out = cur.fetchall() - # Include temporary tables only if no database has been explicitly specified - # to avoid temp tables showing up in all calls to `list_tables` - if db == "public": - out += self._fetch_temp_tables() - return self._filter_with_like(map(itemgetter(0), out), like) - def _fetch_temp_tables(self): - # postgres temporary tables are stored in a separate schema - # so we need to independently grab them and return them along with - # the existing results - - sql = ( - sg.select("table_name") - .from_(sg.table("tables", db="information_schema")) - .distinct() - .where(C.table_type.eq(sge.convert("LOCAL TEMPORARY"))) - .sql(self.dialect) + def _list_foreign_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + table_loc = self._warn_and_create_table_loc(database) + + catalog = table_loc.catalog or self.current_catalog + database = table_loc.db or self.current_database + + col = "foreign_table_name" + where_predicates = [ + C.foreign_table_catalog.eq(sge.convert(catalog)), + C.foreign_table_schema.like(sge.convert(database)), + ] + sql = self._list_query_constructor( + col, where_predicates=where_predicates, tov="foreign_tables" ) with self._safe_raw_sql(sql) as cur: out = cur.fetchall() - return out + return self._filter_with_like(map(itemgetter(0), out), like) + + @deprecated(as_of="10.0", instead="use the con.tables") + def list_tables( + self, + like: str | None = None, + schema: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List the tables in the database.""" + + table_loc = self._warn_and_create_table_loc(database, schema) + + database = self.current_database + if table_loc is not None: + database = table_loc.db or database + + tables_and_views = list( + set(self._list_tables(like=like, database=database)) + | set(self._list_temp_tables(like=like, database=database)) + | set(self._list_views(like=like, database=database)) + | set(self._list_temp_views(like=like, database=database)) + | set(self._list_foreign_tables(like=like, database=database)) + ) + + return tables_and_views def list_catalogs(self, like=None) -> list[str]: # http://dba.stackexchange.com/a/1304/58517 diff --git a/ibis/backends/postgres/tests/test_client.py b/ibis/backends/postgres/tests/test_client.py index 90b018f102c1..aea55c707b78 100644 --- a/ibis/backends/postgres/tests/test_client.py +++ b/ibis/backends/postgres/tests/test_client.py @@ -66,14 +66,14 @@ def test_simple_aggregate_execute(alltypes): def test_list_tables(con): - assert len(con.list_tables(like="functional")) == 1 - assert {"astronauts", "batting", "diamonds"} <= set(con.list_tables()) + assert len(con.tables(like="functional")) == 1 + assert {"astronauts", "batting", "diamonds"} <= set(con.tables) _ = con.create_table("tempy", schema=ibis.schema(dict(id="int")), temp=True) - assert "tempy" in con.list_tables() + assert "tempy" in con.tables # temp tables only show up when database='public' (or default) - assert "tempy" not in con.list_tables(database="tiger") + assert "tempy" not in con.tables(database="tiger") def test_compile_toplevel(assert_sql): @@ -196,7 +196,7 @@ def test_unknown_column_type(con, col): def test_insert_with_cte(con): X = con.create_table("X", schema=ibis.schema(dict(id="int")), temp=True) - assert "X" in con.list_tables() + assert "X" in con.tables expr = X.join(X.mutate(a=X["id"] + 1), ["id"]) Y = con.create_table("Y", expr, temp=True) assert Y.execute().empty @@ -432,3 +432,27 @@ def enum_table(con): def test_enum_table(con, enum_table): t = con.table(enum_table) assert t.mood.type() == dt.unknown + + +def test_list_foreign_table(con): + sql = """CREATE EXTENSION IF NOT EXISTS postgres_fdw; + + CREATE SERVER foreign_server + FOREIGN DATA WRAPPER postgres_fdw; + + CREATE USER MAPPING FOR CURRENT_USER + SERVER foreign_server; + + CREATE FOREIGN TABLE my_foreign_table ( + id INTEGER, + name VARCHAR + ) + SERVER foreign_server; + """ + + con.raw_sql(sql) + foreign_tables = con.ddl.list_foreign_tables() + + assert isinstance(foreign_tables, list) + assert "my_foreign_table" in foreign_tables + assert "my_foreign_table" not in con.ddl.list_tables() diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 8d93eca617ce..d2227e2f5175 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -62,7 +62,7 @@ def test_catalog_consistency(backend, con): assert current_catalog in catalogs -def test_list_all_tables_and_views(con): +def test_con_tables(con): tables = con.tables # only table that is guaranteed to be in all backends From a42f4c8365de5bdf844081db98950a790024c09d Mon Sep 17 00:00:00 2001 From: ncclementi Date: Fri, 30 Aug 2024 13:50:08 -0400 Subject: [PATCH 04/15] chore: fix bug on raising not impl --- ibis/backends/__init__.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 73bd501a86fb..a22c7e08cfd6 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -50,12 +50,11 @@ def _execute_if_exists( self, method_name: str, database=None, like=None ) -> list[str]: """Executes method if it exists and it doesn't raise a NotImplementedError, else returns an empty list.""" - method = getattr(self._backend.ddl, method_name) - if callable(method): - try: - return method(database=database, like=like) - except NotImplementedError: - pass + try: + method = getattr(self._backend.ddl, method_name) + return method(database=database, like=like) + except NotImplementedError: + pass return [] def _gather_tables(self, database=None, like=None) -> list[str]: @@ -129,11 +128,13 @@ def __init__(self, backend: BaseBackend) -> None: self._backend = backend def _raise_if_not_implemented(self, method_name: str): - method = getattr(self._backend, method_name) - if not callable(method): - raise NotImplementedError( - f"The method {method_name} is not implemented for the {self._backend.name} backend" - ) + try: + getattr(self._backend, method_name) + except AttributeError as e: + if f"has no attribute '{method_name}'" in str(e): + raise NotImplementedError( + f"The method {method_name} is not implemented for the {self._backend.name} backend" + ) def list_tables( self, like: str | None = None, database: tuple[str, str] | str | None = None From 5c22bc2bc27ecccf58a82c8f996e0d80f18f98f0 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Fri, 30 Aug 2024 18:14:34 -0400 Subject: [PATCH 05/15] refactor(api): mysql ddl accessor implementation --- ibis/backends/mysql/__init__.py | 139 +++++++++++++---------- ibis/backends/mysql/tests/test_client.py | 6 +- ibis/backends/tests/test_api.py | 16 ++- 3 files changed, 94 insertions(+), 67 deletions(-) diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 5a0ef74edbad..10894ef9b059 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -24,7 +24,8 @@ from ibis.backends import CanCreateDatabase from ibis.backends.mysql.datatypes import _type_from_cursor_info from ibis.backends.sql import SQLBackend -from ibis.backends.sql.compilers.base import STAR, TRUE, C +from ibis.backends.sql.compilers.base import STAR, C +from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Mapping @@ -289,79 +290,95 @@ def raw_sql(self, query: str | sg.Expression, **kwargs: Any) -> Any: con.commit() return cursor - # TODO: disable positional arguments - def list_tables( + def _list_query_constructor(self, col: str, where_predicates: list) -> str: + """Helper function to construct sqlglot queries for _list_* methods.""" + + sg_query = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .where(*where_predicates) + ).sql(self.name) + + return sg_query + + def _list_objects( + self, + like: str | None, + database: tuple[str, str] | str | None, + object_type: str, + ) -> list[str]: + """Generic method to list objects like tables or views.""" + + table_loc = self._warn_and_create_table_loc(database) + + ## having an issue as it seem mysql doesn't have a self.current_catalog + ## not clear to me why, my guess is it doesn't support catalogs but unclear + # catalog = table_loc.catalog or self.current_catalog + database = table_loc.db or self.current_database + + col = "table_name" + where_predicates = [ + # C.table_catalog.eq(sge.convert(catalog)), + C.table_schema.eq(sge.convert(database)), + C.table_type.eq(object_type), + ] + + sql = self._list_query_constructor(col, where_predicates) + + with self._safe_raw_sql(sql) as cur: + out = cur.fetchall() + + return self._filter_with_like(map(itemgetter(0), out), like) + + def _list_tables( self, like: str | None = None, - schema: str | None = None, database: tuple[str, str] | str | None = None, ) -> list[str]: - """List the tables in the database. + """List physical tables.""" - ::: {.callout-note} - ## Ibis does not use the word `schema` to refer to database hierarchy. + return self._list_objects(like, database, "BASE TABLE") - A collection of tables is referred to as a `database`. - A collection of `database` is referred to as a `catalog`. + def _list_temp_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary tables.""" - These terms are mapped onto the corresponding features in each - backend (where available), regardless of whether the backend itself - uses the same terminology. - ::: + return self._list_objects(like, database, "TEMPORARY") - Parameters - ---------- - like - A pattern to use for listing tables. - schema - [deprecated] The schema to perform the list against. - database - Database to list tables from. Default behavior is to show tables in - the current database (`self.current_database`). - """ - if schema is not None: - self._warn_schema() - - if schema is not None and database is not None: - raise ValueError( - "Using both the `schema` and `database` kwargs is not supported. " - "`schema` is deprecated and will be removed in Ibis 10.0" - "\nUse the `database` kwarg with one of the following patterns:" - '\ndatabase="database"' - '\ndatabase=("catalog", "database")' - '\ndatabase="catalog.database"', - ) - elif schema is not None: - table_loc = schema - elif database is not None: - table_loc = database - else: - table_loc = self.current_database + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" - table_loc = self._to_sqlglot_table(table_loc) + return self._list_objects(like, database, "VIEW") - conditions = [TRUE] + # TODO: disable positional arguments + @deprecated(as_of="10.0", instead="use the con.tables") + def list_tables( + self, + like: str | None = None, + schema: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List the tables and views in the database.""" + table_loc = self._warn_and_create_table_loc(database, schema) - if (sg_cat := table_loc.args["catalog"]) is not None: - sg_cat.args["quoted"] = False - if (sg_db := table_loc.args["db"]) is not None: - sg_db.args["quoted"] = False - if table_loc.catalog or table_loc.db: - conditions = [C.table_schema.eq(sge.convert(table_loc.sql(self.name)))] + database = self.current_database + if table_loc is not None: + database = table_loc.db or database - col = "table_name" - sql = ( - sg.select(col) - .from_(sg.table("tables", db="information_schema")) - .distinct() - .where(*conditions) - .sql(self.name) + tables_and_views = list( + set(self._list_tables(like=like, database=database)) + | set(self._list_temp_tables(like=like, database=database)) + | set(self._list_views(like=like, database=database)) ) - with self._safe_raw_sql(sql) as cur: - out = cur.fetchall() - - return self._filter_with_like(map(itemgetter(0), out), like) + return tables_and_views def execute( self, expr: ir.Expr, limit: str | None = "default", **kwargs: Any @@ -480,7 +497,7 @@ def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: ) # only register if we haven't already done so - if (name := op.name) not in self.list_tables(): + if (name := op.name) not in self.tables: quoted = self.compiler.quoted column_defs = [ sg.exp.ColumnDef( diff --git a/ibis/backends/mysql/tests/test_client.py b/ibis/backends/mysql/tests/test_client.py index f7877f462e46..469c68fe2d01 100644 --- a/ibis/backends/mysql/tests/test_client.py +++ b/ibis/backends/mysql/tests/test_client.py @@ -234,13 +234,13 @@ def test_list_tables_schema_warning_refactor(con): "event", "func", } - assert con.list_tables() + assert con.tables with pytest.warns(FutureWarning): assert mysql_tables.issubset(con.list_tables(schema="mysql")) - assert mysql_tables.issubset(con.list_tables(database="mysql")) - assert mysql_tables.issubset(con.list_tables(database=("mysql",))) + assert mysql_tables.issubset(con.ddl.list_tables(database="mysql")) + assert mysql_tables.issubset(con.ddl.list_tables(database=("mysql",))) def test_invalid_port(): diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index d2227e2f5175..949fbc9e64f0 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -157,7 +157,10 @@ def test_list_tables(ddl_con): assert table_name not in ddl_con.ddl.list_views() assert table_name not in ddl_con.ddl.list_temp_tables() - assert table_name not in ddl_con.ddl.list_temp_views() + try: + assert table_name not in ddl_con.ddl.list_temp_views() + except NotImplementedError: # not all backends have list_temp_views + return def test_list_views(ddl_con, temp_view): @@ -171,7 +174,10 @@ def test_list_views(ddl_con, temp_view): assert temp_view in views assert temp_view not in ddl_con.ddl.list_tables() assert temp_view not in ddl_con.ddl.list_temp_tables() - assert temp_view not in ddl_con.ddl.list_temp_views() + try: + assert temp_view not in ddl_con.ddl.list_temp_views() + except NotImplementedError: # not all backends have list_temp_views + return def test_list_temp_tables(ddl_con): @@ -183,10 +189,14 @@ def test_list_temp_tables(ddl_con): assert isinstance(temp_tables, list) assert temp_table_name in temp_tables assert temp_table_name not in ddl_con.ddl.list_views() - assert temp_table_name not in ddl_con.ddl.list_temp_views() assert temp_table_name not in ddl_con.ddl.list_tables() + try: + assert temp_table_name not in ddl_con.ddl.list_temp_views() + except NotImplementedError: # not all backends have list_temp_views + return +@pytest.mark.never("mysql", reason="mysql does not support temporary views") def test_list_temp_views(ddl_con): # TODO: replace raw_sql with create_temp From 82db0c5c4702ae3458648db36d490ac174a7992f Mon Sep 17 00:00:00 2001 From: ncclementi Date: Tue, 3 Sep 2024 21:59:17 -0400 Subject: [PATCH 06/15] refactor(api): sqlite ddl accessor implementation --- ibis/backends/sqlite/__init__.py | 127 ++++++++++++++++------ ibis/backends/sqlite/tests/test_client.py | 6 +- ibis/backends/tests/test_api.py | 1 - 3 files changed, 99 insertions(+), 35 deletions(-) diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index b9422b21d335..7f475cef2cd1 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -21,6 +21,7 @@ from ibis.backends.sql.compilers.base import C, F from ibis.backends.sqlite.converter import SQLitePandasData from ibis.backends.sqlite.udf import ignore_nulls, register_all +from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Iterator, Mapping @@ -152,45 +153,109 @@ def list_databases(self, like: str | None = None) -> list[str]: return sorted(self._filter_with_like(results, like)) + def _list_query_constructor(self, col: str, where_predicates: list) -> str: + """Helper function to construct sqlglot queries for _list_* methods.""" + + sg_query = ( + sg.select(col).from_(F.pragma_table_list()).where(*where_predicates) + ).sql(self.name) + + return sg_query + + def _list_objects( + self, + like: str | None, + database: tuple[str, str] | str | None, + object_type: str, + is_temp: bool = False, + ) -> list[str]: + """Generic method to list objects like tables or views.""" + + table_loc = self._warn_and_create_table_loc(database) + + # sqlite doesn't support catalogs as far as I can tell + # all temp tables are in the "temp" sqlite schema + database = table_loc.db or ("temp" if is_temp else self.current_database) + # needs to check what db I get if main or nothing when db none + + col = "name" + where_predicates = [ + C.schema.eq(sge.convert(database)), + C.type.eq(object_type), + ~( + C.name.isin( + sge.convert("sqlite_schema"), + sge.convert("sqlite_master"), + sge.convert("sqlite_temp_schema"), + sge.convert("sqlite_temp_master"), + ) + ), + ] + + sql = self._list_query_constructor(col, where_predicates) + with self._safe_raw_sql(sql) as cur: + results = [r[0] for r in cur.fetchall()] + + return sorted(self._filter_with_like(results, like)) + + def _list_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List physical tables.""" + + return self._list_objects(like, database, "table") + + def _list_temp_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary tables.""" + + return self._list_objects(like, database, "table", is_temp=True) + + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" + + return self._list_objects(like, database, "view") + + def _list_temp_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary views.""" + + return self._list_objects(like, database, "view", is_temp=True) + + @deprecated(as_of="10.0", instead="use the con.tables") def list_tables( self, like: str | None = None, database: str | None = None, ) -> list[str]: - """List the tables in the database. + """List tables and views.""" - Parameters - ---------- - like - A pattern to use for listing tables. - database - Database to list tables from. Default behavior is to show tables in - the current database. - """ - if database is None: - database = "main" + table_loc = self._warn_and_create_table_loc(database) - sql = ( - sg.select("name") - .from_(F.pragma_table_list()) - .where( - C.schema.eq(sge.convert(database)), - C.type.isin(sge.convert("table"), sge.convert("view")), - ~( - C.name.isin( - sge.convert("sqlite_schema"), - sge.convert("sqlite_master"), - sge.convert("sqlite_temp_schema"), - sge.convert("sqlite_temp_master"), - ) - ), - ) - .sql(self.name) + database = self.current_database + if table_loc is not None: + database = table_loc.db or database + + tables_and_views = list( + set(self._list_tables(like=like, database=database)) + | set(self._list_temp_tables(like=like, database=database)) + | set(self._list_views(like=like, database=database)) + | set(self._list_temp_views(like=like, database=database)) ) - with self._safe_raw_sql(sql) as cur: - results = [r[0] for r in cur.fetchall()] - return sorted(self._filter_with_like(results, like)) + return tables_and_views def _parse_type(self, typ: str, nullable: bool) -> dt.DataType: typ = typ.lower() @@ -351,7 +416,7 @@ def _generate_create_table(self, table: sge.Table, schema: sch.Schema): def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: # only register if we haven't already done so - if op.name not in self.list_tables(database="temp"): + if op.name not in self.tables(database="temp"): table = sg.table(op.name, quoted=self.compiler.quoted, catalog="temp") create_stmt = self._generate_create_table(table, op.schema).sql(self.name) df = op.data.to_frame() diff --git a/ibis/backends/sqlite/tests/test_client.py b/ibis/backends/sqlite/tests/test_client.py index d7d5def383b0..37781b2d6aac 100644 --- a/ibis/backends/sqlite/tests/test_client.py +++ b/ibis/backends/sqlite/tests/test_client.py @@ -19,13 +19,13 @@ def test_attach_file(tmp_path): client = ibis.sqlite.connect() - assert not client.list_tables() + assert not client.tables() client.attach("baz", Path(dbpath)) client.attach("bar", dbpath) - foo_tables = client.list_tables(database="baz") - bar_tables = client.list_tables(database="bar") + foo_tables = client.ddl.list_tables(database="baz") + bar_tables = client.ddl.list_tables(database="bar") assert foo_tables == ["test"] assert foo_tables == bar_tables diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 949fbc9e64f0..271cc656154c 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -147,7 +147,6 @@ def test_unbind(alltypes, expr_fn): assert "Unbound" in repr(expr.unbind()) -## works on duckdb only for now def test_list_tables(ddl_con): # should check only physical tables table_name = "functional_alltypes" From d9176f0fbc72c1453f71495086467bd1a6128d07 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Fri, 6 Sep 2024 17:02:58 -0600 Subject: [PATCH 07/15] refactor(api): datafusion ddl accessor implementation --- ibis/backends/datafusion/__init__.py | 62 +++++++++++++++++-- .../backends/datafusion/tests/test_connect.py | 9 +-- ibis/backends/tests/test_api.py | 36 +++++++---- 3 files changed, 87 insertions(+), 20 deletions(-) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index 05ec7d3bafbf..ceb237fd2852 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -286,6 +286,57 @@ def drop_database( with self._safe_raw_sql(sge.Drop(kind="SCHEMA", this=db_name, exists=force)): pass + def _list_query_constructor(self, col: str, where_predicates: list) -> str: + """Helper function to construct sqlglot queries for _list_* methods.""" + + sg_query = ( + sg.select(col) + .from_(sg.table("tables", db="information_schema")) + .where(*where_predicates) + ).sql(self.name) + + return sg_query + + def _list_objects( + self, + like: str | None, + database: tuple[str, str] | str | None, + object_type: str, + ) -> list[str]: + """Generic method to list objects like tables or views.""" + + database = database or "public" + + col = "table_name" + where_predicates = [ + C.table_schema.eq(sge.convert(database)), + C.table_type.eq(object_type), + ] + + sql = self._list_query_constructor(col, where_predicates) + out = self.raw_sql(sql).to_pydict() + + return self._filter_with_like(out[col], like) + + def _list_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List physical tables.""" + + return self._list_objects(like, database, "BASE TABLE") + + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" + + return self._list_objects(like, database, "VIEW") + + @deprecated(as_of="10.0", instead="use the con.tables") def list_tables( self, like: str | None = None, @@ -306,12 +357,13 @@ def list_tables( The list of the table names that match the pattern `like`. """ database = database or "public" - query = ( - sg.select("table_name") - .from_("information_schema.tables") - .where(sg.column("table_schema").eq(sge.convert(database))) + + tables_and_views = list( + set(self._list_tables(like=like, database=database)) + | set(self._list_views(like=like, database=database)) ) - return self.raw_sql(query).to_pydict()["table_name"] + + return tables_and_views def get_schema( self, diff --git a/ibis/backends/datafusion/tests/test_connect.py b/ibis/backends/datafusion/tests/test_connect.py index 62526305c1d8..950a42b4b3b0 100644 --- a/ibis/backends/datafusion/tests/test_connect.py +++ b/ibis/backends/datafusion/tests/test_connect.py @@ -20,7 +20,8 @@ def name_to_path(data_dir): def test_none_config(): config = None conn = ibis.datafusion.connect(config) - assert conn.list_tables() == [] + assert conn.ddl.list_tables() == [] + assert conn.ddl.list_views() == [] def test_str_config(name_to_path): @@ -28,7 +29,7 @@ def test_str_config(name_to_path): # if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register with pytest.warns(FutureWarning, match="v9.1"): conn = ibis.datafusion.connect(config) - assert sorted(conn.list_tables()) == sorted(name_to_path) + assert sorted(conn.tables) == sorted(name_to_path) def test_path_config(name_to_path): @@ -36,7 +37,7 @@ def test_path_config(name_to_path): # if path.endswith((".parquet", ".csv", ".csv.gz")) connect triggers register with pytest.warns(FutureWarning, match="v9.1"): conn = ibis.datafusion.connect(config) - assert sorted(conn.list_tables()) == sorted(name_to_path) + assert sorted(conn.tables) == sorted(name_to_path) def test_context_config(name_to_path): @@ -44,4 +45,4 @@ def test_context_config(name_to_path): for name, path in name_to_path.items(): ctx.register_parquet(name, str(path)) conn = ibis.datafusion.connect(ctx) - assert sorted(conn.list_tables()) == sorted(name_to_path) + assert sorted(conn.tables) == sorted(name_to_path) diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index 271cc656154c..24593dfe7def 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -1,5 +1,7 @@ from __future__ import annotations +import contextlib + import pytest import sqlglot as sg import sqlglot.expressions as sge @@ -155,11 +157,14 @@ def test_list_tables(ddl_con): assert table_name in tables assert table_name not in ddl_con.ddl.list_views() - assert table_name not in ddl_con.ddl.list_temp_tables() - try: + + # not all backends have list_temp_tables + with contextlib.suppress(NotImplementedError): + assert table_name not in ddl_con.ddl.list_temp_tables() + + # not all backends have list_temp_views + with contextlib.suppress(NotImplementedError): assert table_name not in ddl_con.ddl.list_temp_views() - except NotImplementedError: # not all backends have list_temp_views - return def test_list_views(ddl_con, temp_view): @@ -172,13 +177,19 @@ def test_list_views(ddl_con, temp_view): assert isinstance(views, list) assert temp_view in views assert temp_view not in ddl_con.ddl.list_tables() - assert temp_view not in ddl_con.ddl.list_temp_tables() - try: + + # not all backends have list_temp_tables + with contextlib.suppress(NotImplementedError): + assert temp_view not in ddl_con.ddl.list_temp_tables() + + # not all backends have list_temp_views + with contextlib.suppress(NotImplementedError): assert temp_view not in ddl_con.ddl.list_temp_views() - except NotImplementedError: # not all backends have list_temp_views - return +@pytest.mark.never( + "datafusion", reason="datafusion does not support temporary views on sql" +) def test_list_temp_tables(ddl_con): expr = ddl_con.table("functional_alltypes") temp_table_name = "all_types_temp" @@ -189,12 +200,15 @@ def test_list_temp_tables(ddl_con): assert temp_table_name in temp_tables assert temp_table_name not in ddl_con.ddl.list_views() assert temp_table_name not in ddl_con.ddl.list_tables() - try: + + # not all backends have list_temp_views + with contextlib.suppress(NotImplementedError): assert temp_table_name not in ddl_con.ddl.list_temp_views() - except NotImplementedError: # not all backends have list_temp_views - return +@pytest.mark.never( + "datafusion", reason="datafusion does not support temporary views on sql" +) @pytest.mark.never("mysql", reason="mysql does not support temporary views") def test_list_temp_views(ddl_con): # TODO: replace raw_sql with create_temp From 43b967fa2aed2a40c7b202128926f3a1df694ad8 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Fri, 20 Sep 2024 20:22:12 -0400 Subject: [PATCH 08/15] chore: fix some problems after merge --- ibis/backends/duckdb/__init__.py | 4 ++-- ibis/backends/duckdb/tests/test_client.py | 8 ++++---- ibis/backends/mysql/__init__.py | 2 +- ibis/backends/postgres/__init__.py | 6 +++--- ibis/backends/sqlite/__init__.py | 4 ++-- ibis/backends/sqlite/tests/test_client.py | 4 ++-- ibis/backends/tests/test_client.py | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index a7def68d0747..747972fcaf08 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -999,7 +999,7 @@ def _list_objects( ) -> list[str]: """Generic method to list objects like tables or views.""" - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) catalog = table_loc.catalog or ("temp" if is_temp else self.current_catalog) database = table_loc.db or self.current_database @@ -1052,7 +1052,7 @@ def _list_temp_views( return self._list_objects(like, database, "VIEW", is_temp=True) - @deprecated(as_of="10.0", instead="use the con.tables") + # @deprecated(as_of="10.0", instead="use the con.tables") def list_tables( self, like: str | None = None, diff --git a/ibis/backends/duckdb/tests/test_client.py b/ibis/backends/duckdb/tests/test_client.py index ad65cdcbb50d..a0483e537460 100644 --- a/ibis/backends/duckdb/tests/test_client.py +++ b/ibis/backends/duckdb/tests/test_client.py @@ -293,8 +293,8 @@ def test_list_tables(con): icecream_table = ["ice_cream"] - assert con.list_tables(database="shops") == icecream_table - assert con.list_tables(database=("shops",)) == icecream_table + assert con.ddl.list_tables(database="shops") == icecream_table + assert con.ddl.list_tables(database=("shops",)) == icecream_table def test_settings_repr(): @@ -416,7 +416,7 @@ def test_read_csv_with_types(tmp_path, input, all_varchar): def test_memtable_doesnt_leak(con, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) name = "memtable_doesnt_leak" - assert name not in con.list_tables() + assert name not in con.ddl.list_tables() df = ibis.memtable({"a": [1, 2, 3]}, name=name).execute() - assert name not in con.list_tables() + assert name not in con.ddl.list_tables() assert len(df) == 3 diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 9d7c65eb9a94..0a723a498180 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -329,7 +329,7 @@ def _list_objects( ) -> list[str]: """Generic method to list objects like tables or views.""" - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) ## having an issue as it seem mysql doesn't have a self.current_catalog ## not clear to me why, my guess is it doesn't support catalogs but unclear diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index baa291bc0c5a..674ca26c4462 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -325,7 +325,7 @@ def _list_objects( (not used for temporary views) """ - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) catalog = table_loc.catalog or self.current_catalog # temporary tables and views are in a separate postgres schema @@ -406,7 +406,7 @@ def _list_temp_views( ) -> list[str]: """List temporary views.""" - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) catalog = table_loc.catalog or self.current_catalog # temporary tables and views are in a separate postgres table_schema that @@ -434,7 +434,7 @@ def _list_foreign_tables( like: str | None = None, database: tuple[str, str] | str | None = None, ) -> list[str]: - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) catalog = table_loc.catalog or self.current_catalog database = table_loc.db or self.current_database diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index 930716c34b53..4fb6b39b173b 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -178,7 +178,7 @@ def _list_objects( ) -> list[str]: """Generic method to list objects like tables or views.""" - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) # sqlite doesn't support catalogs as far as I can tell # all temp tables are in the "temp" sqlite schema @@ -247,7 +247,7 @@ def list_tables( ) -> list[str]: """List tables and views.""" - table_loc = self._warn_and_create_table_loc(database) + table_loc = self._to_sqlglot_table(database) database = self.current_database if table_loc is not None: diff --git a/ibis/backends/sqlite/tests/test_client.py b/ibis/backends/sqlite/tests/test_client.py index ad484189d449..a83b46e9a2d3 100644 --- a/ibis/backends/sqlite/tests/test_client.py +++ b/ibis/backends/sqlite/tests/test_client.py @@ -93,5 +93,5 @@ def test_has_operation(con): def test_list_temp_tables_by_default(con): name = ibis.util.gen_name("sqlite_temp_table") con.create_table(name, schema={"a": "int"}, temp=True) - assert name in con.list_tables(database="temp") - assert name in con.list_tables() + assert name in con.ddl.list_temp_tables(database="temp") + assert name in con.tables() diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index e2ce80000971..c73578097779 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1003,7 +1003,7 @@ def test_create_table_in_memory(con, obj, table_name, monkeypatch): t = con.create_table(table_name, obj()) try: - assert table_name in con.list_tables() + assert table_name in con.ddl.list_tables() assert pa.table({"a": ["a"], "b": [1]}).equals(t.to_pyarrow()) finally: con.drop_table(table_name, force=True) From 0d6673f64b793204172ef582ef776aee9fcd686f Mon Sep 17 00:00:00 2001 From: ncclementi Date: Sun, 22 Sep 2024 13:46:48 -0400 Subject: [PATCH 09/15] chore: fix test_list_tables mysql --- ibis/backends/mysql/tests/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/backends/mysql/tests/test_client.py b/ibis/backends/mysql/tests/test_client.py index ad82c97b13ec..bcac77d36b03 100644 --- a/ibis/backends/mysql/tests/test_client.py +++ b/ibis/backends/mysql/tests/test_client.py @@ -236,8 +236,8 @@ def test_list_tables(con): } assert con.tables - assert mysql_tables.issubset(con.list_tables(database="mysql")) - assert mysql_tables.issubset(con.list_tables(database=("mysql",))) + assert mysql_tables.issubset(con.ddl.list_tables(database="mysql")) + assert mysql_tables.issubset(con.ddl.list_tables(database=("mysql",))) def test_invalid_port(): From 96fa24af1c1d45d8cd96c510bed345c5c43fb603 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Sun, 22 Sep 2024 13:54:43 -0400 Subject: [PATCH 10/15] test: replace lookup of list_tables to DDLAccesor --- ibis/backends/tests/test_signatures.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_signatures.py b/ibis/backends/tests/test_signatures.py index d1065867c0bf..d9be7cf74900 100644 --- a/ibis/backends/tests/test_signatures.py +++ b/ibis/backends/tests/test_signatures.py @@ -10,6 +10,7 @@ CanCreateDatabase, CanListCatalog, CanListDatabase, + DDLAccessor, ) from ibis.backends.sql import SQLBackend from ibis.backends.tests.signature.typecheck import compatible @@ -100,7 +101,7 @@ def _scrape_methods(modules, params): ), ), "list_tables": pytest.param( - BaseBackend, + DDLAccessor, # Replacing BaseBackend with DDLAccessor "list_tables", marks=pytest.mark.notyet(["flink"]), ), From d7273a32c73b1bb9de3d22cec08b278ecf4e07aa Mon Sep 17 00:00:00 2001 From: ncclementi Date: Mon, 23 Sep 2024 20:08:17 -0400 Subject: [PATCH 11/15] chore: fix typo --- ibis/backends/duckdb/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 747972fcaf08..a05d4395c136 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1052,7 +1052,7 @@ def _list_temp_views( return self._list_objects(like, database, "VIEW", is_temp=True) - # @deprecated(as_of="10.0", instead="use the con.tables") + @deprecated(as_of="10.0", instead="use the con.tables") def list_tables( self, like: str | None = None, From b84431928478d7952034f775f77d2952eefc13e6 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Mon, 23 Sep 2024 20:27:11 -0400 Subject: [PATCH 12/15] refactor(api): clickhouse ddl accessor implementation --- ibis/backends/__init__.py | 7 ++ ibis/backends/clickhouse/__init__.py | 110 +++++++++++++++--- ibis/backends/clickhouse/tests/test_client.py | 28 ++++- ibis/backends/tests/test_api.py | 3 +- 4 files changed, 125 insertions(+), 23 deletions(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index 7a49d32f4e7b..efd11bf1fcc7 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -69,6 +69,7 @@ def _gather_tables(self, database=None, like=None) -> list[str]: "list_views", "list_temp_views", "list_foreign_tables", + "list_materialized_views", ] tables = [] for method_name in list_methods: @@ -250,6 +251,12 @@ def list_temp_views( self._raise_if_not_implemented("_list_temp_views") return self._backend._list_temp_views(like=like, database=database) + def list_materialized_views( + self, like: str | None = None, database: tuple[str, str] | str | None = None + ) -> list[str]: + self._raise_if_not_implemented("_list_materialized_views") + return self._backend._list_materialized_views(like=like, database=database) + def list_foreign_tables( self, like: str | None = None, database: tuple[str, str] | str | None = None ) -> list[str]: diff --git a/ibis/backends/clickhouse/__init__.py b/ibis/backends/clickhouse/__init__.py index be3334373594..74b5e66b5639 100644 --- a/ibis/backends/clickhouse/__init__.py +++ b/ibis/backends/clickhouse/__init__.py @@ -30,6 +30,7 @@ from ibis.backends.clickhouse.converter import ClickHousePandasData from ibis.backends.sql import SQLBackend from ibis.backends.sql.compilers.base import C +from ibis.util import deprecated if TYPE_CHECKING: from collections.abc import Iterable, Iterator, Mapping @@ -206,38 +207,111 @@ def list_databases(self, like: str | None = None) -> list[str]: databases = [] return self._filter_with_like(databases, like) - def list_tables( - self, like: str | None = None, database: str | None = None + def _list_query_constructor(self, col: str, where_predicates: list) -> str: + """Helper function to construct sqlglot queries for _list_* methods.""" + + sg_query = ( + sg.select(col) + .from_(sg.table("tables", db="system")) + .where(*where_predicates) + ).sql(self.name) + + return sg_query + + def _list_objects( + self, + like: str | None, + database: tuple[str, str] | str | None, + object_type: str, + is_temp: bool = False, ) -> list[str]: - """List the tables in the database. + """Generic method to list objects like tables or views.""" - Parameters - ---------- - like - A pattern to use for listing tables. - database - Database to list tables from. Default behavior is to show tables in - the current database. - """ + table_loc = self._to_sqlglot_table(database) - query = sg.select(C.name).from_(sg.table("tables", db="system")) + # when is_temp=True the table is in memory and no database is assigned + database = table_loc.db or ("" if is_temp else self.current_database) + col = "name" - if database is None: - database = self.compiler.f.currentDatabase() + if object_type == "Table": # we have to check is not a view or mat view + where_predicates = [ + C.database.eq(sge.convert(database)), + C.engine.isin("View", "MaterializedView").not_(), + C.is_temporary.eq(int(is_temp)), + ] else: - database = sge.convert(database) - - query = query.where(C.database.eq(database).or_(C.is_temporary)) + where_predicates = [ + C.database.eq(sge.convert(database)), + C.engine.eq(object_type), + C.is_temporary.eq(int(is_temp)), + ] - with self._safe_raw_sql(query) as result: + sql = self._list_query_constructor(col, where_predicates) + with self._safe_raw_sql(sql) as result: results = result.result_columns if results: (tables,) = results else: tables = [] + return self._filter_with_like(tables, like) + def _list_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List physical tables.""" + + return self._list_objects(like, database, "Table") + + def _list_temp_tables( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List temporary tables.""" + + return self._list_objects(like, database, "Table", is_temp=True) + + def _list_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" + + return self._list_objects(like, database, "View") + + def _list_materialized_views( + self, + like: str | None = None, + database: tuple[str, str] | str | None = None, + ) -> list[str]: + """List views.""" + + return self._list_objects(like, database, "MaterializedView") + + @deprecated(as_of="10.0", instead="use the con.tables") + def list_tables( + self, like: str | None = None, database: str | None = None + ) -> list[str]: + table_loc = self._to_sqlglot_table(database) + + database = self.current_database + if table_loc is not None: + database = table_loc.db or database + + tables_and_views = list( + set(self._list_tables(like=like, database=database)) + | set(self._list_temp_tables(like=like, database=database)) + | set(self._list_views(like=like, database=database)) + | set(self._list_materialized_views(like=like, database=database)) + ) + + return tables_and_views + def _normalize_external_tables(self, external_tables=None) -> ExternalData | None: """Merge registered external tables with any new external tables.""" external_data = ExternalData() diff --git a/ibis/backends/clickhouse/tests/test_client.py b/ibis/backends/clickhouse/tests/test_client.py index 270b305f3aeb..fcde3af85f7e 100644 --- a/ibis/backends/clickhouse/tests/test_client.py +++ b/ibis/backends/clickhouse/tests/test_client.py @@ -193,8 +193,8 @@ def test_get_schema_using_query(con, query, expected_schema): def test_list_tables_database(con): - tables = con.list_tables() - tables2 = con.list_tables(database=con.current_database) + tables = con.ddl.list_tables() + tables2 = con.ddl.list_tables(database=con.current_database) # some overlap, but not necessarily identical because # a database may have temporary tables added/removed between list_tables # calls @@ -216,7 +216,7 @@ def tmpcon(worker_id): def test_list_tables_empty_database(tmpcon): - assert not tmpcon.list_tables() + assert not tmpcon.ddl.list_tables() @pytest.mark.parametrize("temp", [True, False], ids=["temp", "no_temp"]) @@ -453,3 +453,25 @@ def test_query_cache(con, method_name): method(settings={"ooze_query_cash": True}) assert result == expected + + +def test_list_materialized_views(con): + con.create_table( + "my_table", + {"id": [1, 2, 3], "val": ["a", "b", "c"]}, + schema=ibis.schema({"id": "!int", "val": "!str"}), + overwrite=True, + ) + + # drop mat view if exists, then create + # handles materialized views as tables for dropping purposes + con.raw_sql("""DROP TABLE IF EXISTS mat_view_example""") + # it doesn't like both statements in one + con.raw_sql( + """CREATE MATERIALIZED VIEW mat_view_example ENGINE=MergeTree() ORDER BY id AS SELECT * FROM my_table""" + ) + + assert "mat_view_example" in con.ddl.list_materialized_views() + assert "mat_view_example" not in con.ddl.list_views() + assert "mat_view_example" not in con.ddl.list_tables() + assert "mat_view_example" not in con.ddl.list_temp_tables() diff --git a/ibis/backends/tests/test_api.py b/ibis/backends/tests/test_api.py index a33381cc69cf..29ad03995074 100644 --- a/ibis/backends/tests/test_api.py +++ b/ibis/backends/tests/test_api.py @@ -201,9 +201,8 @@ def test_list_temp_tables(ddl_con): @pytest.mark.never( - "datafusion", reason="datafusion does not support temporary views on sql" + ["datafusion", "mysql", "clickhouse"], reason="does not support temporary views" ) -@pytest.mark.never("mysql", reason="mysql does not support temporary views") def test_list_temp_views(ddl_con): # TODO: replace raw_sql with create_temp From 0985de11cb5cb237c9431c845cfce58ae0c2f973 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Mon, 23 Sep 2024 20:53:25 -0400 Subject: [PATCH 13/15] chore: fix doctests --- ibis/backends/datafusion/__init__.py | 2 +- ibis/backends/duckdb/__init__.py | 4 ++-- ibis/backends/mysql/__init__.py | 2 +- ibis/backends/postgres/__init__.py | 2 +- ibis/backends/sqlite/__init__.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index ba59cc08423a..addf56e5531b 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -99,7 +99,7 @@ def do_connect( ... "diamonds": "ci/ibis-testing-data/csv/diamonds.csv", ... } >>> con = ibis.datafusion.connect(config) - >>> con.list_tables() + >>> con.tables() ['astronauts', 'diamonds'] >>> con.table("diamonds") DatabaseTable: diamonds diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index a05d4395c136..f1e0d409e9f9 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1286,10 +1286,10 @@ def attach_sqlite( ... ) # doctest: +ELLIPSIS <...> >>> con = ibis.connect("duckdb://") - >>> con.list_tables() + >>> con.tables() [] >>> con.attach_sqlite("/tmp/attach_sqlite.db") - >>> con.list_tables() + >>> con.tables() ['t'] """ diff --git a/ibis/backends/mysql/__init__.py b/ibis/backends/mysql/__init__.py index 0a723a498180..b806c69f0672 100644 --- a/ibis/backends/mysql/__init__.py +++ b/ibis/backends/mysql/__init__.py @@ -125,7 +125,7 @@ def do_connect( >>> password = os.environ.get("IBIS_TEST_MYSQL_PASSWORD", "ibis") >>> database = os.environ.get("IBIS_TEST_MYSQL_DATABASE", "ibis_testing") >>> con = ibis.mysql.connect(database=database, host=host, user=user, password=password) - >>> con.list_tables() # doctest: +ELLIPSIS + >>> con.tables() # doctest: +ELLIPSIS [...] >>> t = con.table("functional_alltypes") >>> t diff --git a/ibis/backends/postgres/__init__.py b/ibis/backends/postgres/__init__.py index 674ca26c4462..57f9b8b4a83b 100644 --- a/ibis/backends/postgres/__init__.py +++ b/ibis/backends/postgres/__init__.py @@ -230,7 +230,7 @@ def do_connect( >>> password = os.environ.get("IBIS_TEST_POSTGRES_PASSWORD", "postgres") >>> database = os.environ.get("IBIS_TEST_POSTGRES_DATABASE", "ibis_testing") >>> con = ibis.postgres.connect(database=database, host=host, user=user, password=password) - >>> con.list_tables() # doctest: +ELLIPSIS + >>> con.tables() # doctest: +ELLIPSIS [...] >>> t = con.table("functional_alltypes") >>> t diff --git a/ibis/backends/sqlite/__init__.py b/ibis/backends/sqlite/__init__.py index 4fb6b39b173b..e6c2ef5a6f32 100644 --- a/ibis/backends/sqlite/__init__.py +++ b/ibis/backends/sqlite/__init__.py @@ -487,7 +487,7 @@ def attach(self, name: str, path: str | Path) -> None: >>> con1 = ibis.sqlite.connect("/tmp/original.db") >>> con2 = ibis.sqlite.connect("/tmp/new.db") >>> con1.attach("new", "/tmp/new.db") - >>> con1.list_tables(database="new") + >>> con1.tables(database="new") [] """ with self.begin() as cur: From 68abf01890625907fe149de14c3634ebdec1e34b Mon Sep 17 00:00:00 2001 From: ncclementi Date: Tue, 24 Sep 2024 10:21:09 -0400 Subject: [PATCH 14/15] chore: order table names in datafusion --- ibis/backends/datafusion/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ibis/backends/datafusion/__init__.py b/ibis/backends/datafusion/__init__.py index addf56e5531b..57fbe02989fa 100644 --- a/ibis/backends/datafusion/__init__.py +++ b/ibis/backends/datafusion/__init__.py @@ -300,6 +300,7 @@ def _list_query_constructor(self, col: str, where_predicates: list) -> str: sg.select(col) .from_(sg.table("tables", db="information_schema")) .where(*where_predicates) + .order_by("table_name") # in datafusion we ordered the tables ).sql(self.name) return sg_query From 7b47cc219832a6f30ebbf3ac29ff80a5aaec12e9 Mon Sep 17 00:00:00 2001 From: ncclementi Date: Tue, 24 Sep 2024 12:03:57 -0400 Subject: [PATCH 15/15] chore: remove unncessary set and list operation --- ibis/backends/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index efd11bf1fcc7..830ebfde1959 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -76,7 +76,7 @@ def _gather_tables(self, database=None, like=None) -> list[str]: tables.extend( self._execute_if_exists(method_name, database=database, like=like) ) - return list(set(tables)) + return tables def __call__(self, database=None, like=None): return self._gather_tables(database, like)