"""Transpile GIQL queries to SQL.
This module provides the main entry point for transpiling GIQL queries
to standard SQL.
"""
from contextlib import contextmanager
from typing import Iterator
from typing import Literal
from typing import overload
from sqlglot import parse_one
from giql.dialect import GIQLDialect
from giql.generators import BaseGIQLGenerator
from giql.table import Table
from giql.table import Tables
from giql.transformer import ClusterTransformer
from giql.transformer import IntersectsBinnedJoinTransformer
from giql.transformer import IntersectsDuckDBIEJoinTransformer
from giql.transformer import MergeTransformer
@overload
def transpile(
giql: str,
tables: list[str | Table] | None = None,
*,
dialect: None = None,
intersects_bin_size: int | None = None,
) -> str: ...
@overload
def transpile(
giql: str,
tables: list[str | Table] | None = None,
*,
dialect: Literal["duckdb"],
intersects_bin_size: None = None,
) -> str: ...
[docs]
def transpile(
giql: str,
tables: list[str | Table] | None = None,
*,
dialect: Literal["duckdb"] | None = None,
intersects_bin_size: int | None = None,
) -> str:
"""Transpile a GIQL query to SQL.
Parses the GIQL syntax and converts it to standard SQL-92 compatible
output (uses LATERAL joins where needed for operations like NEAREST).
Parameters
----------
giql : str
The GIQL query string containing genomic extensions like
INTERSECTS, CONTAINS, WITHIN, CLUSTER, MERGE, NEAREST, or DISJOIN.
tables : list[str | :class:`Table`] | None
Table configurations. Strings use default column mappings
(chrom, start, end, strand). :class:`Table` objects provide
custom column name mappings.
dialect : Literal["duckdb"] | None
Optional target dialect. When set to ``"duckdb"``, column-to-column
``INTERSECTS`` joins (INNER, SEMI, or ANTI) are transpiled into a
per-chromosome dynamic-SQL pattern (``SET VARIABLE`` +
``query(getvariable(...))``) that DuckDB plans through its
range-join family (``IE_JOIN`` / ``PIECEWISE_MERGE_JOIN``).
Mutually exclusive with ``intersects_bin_size``. Defaults to
``None`` (the generic binned equi-join path). Hard-error projection
shapes raise ``ValueError`` at transpile time; see the performance
guide for the full enumeration.
intersects_bin_size : int | None
Bin size for INTERSECTS equi-join optimization. When a query
contains a full-table column-to-column INTERSECTS join, the
transpiler rewrites it as a binned equi-join for performance.
Defaults to 10,000 if not specified. Cannot be combined with
``dialect="duckdb"``.
Returns
-------
str
The transpiled SQL query.
Raises
------
ValueError
If the query cannot be parsed or transpiled, if ``dialect`` is
unknown, or if ``dialect="duckdb"`` and ``intersects_bin_size``
are both set.
Examples
--------
Basic usage with default column mappings::
sql = transpile(
"SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'",
tables=["peaks"],
)
Custom :class:`Table` configuration::
sql = transpile(
"SELECT * FROM peaks WHERE interval INTERSECTS 'chr1:1000-2000'",
tables=[
Table(
"peaks",
genomic_col="interval",
chrom_col="chrom",
start_col="start",
end_col="end",
)
],
)
Binned equi-join with custom bin size::
sql = transpile(
"SELECT a.*, b.* FROM peaks a JOIN genes b "
"ON a.interval INTERSECTS b.interval",
tables=["peaks", "genes"],
intersects_bin_size=100000,
)
DuckDB IEJoin dialect (column-to-column INNER/SEMI/ANTI JOIN only;
projections must be qualified)::
sql = transpile(
"SELECT a.chrom, a.start, b.start "
"FROM peaks a JOIN genes b ON a.interval INTERSECTS b.interval",
tables=["peaks", "genes"],
dialect="duckdb",
)
"""
if dialect is not None and dialect != "duckdb":
raise ValueError(f"Unknown dialect: {dialect!r}. Supported: 'duckdb' or None.")
if dialect == "duckdb" and intersects_bin_size is not None:
raise ValueError(
"intersects_bin_size has no effect with dialect='duckdb'; "
"the DuckDB dialect uses an IEJoin per-partition plan instead "
"of the binned equi-join. Pass one or the other, not both."
)
tables_container = _build_tables(tables)
with _reraise_as_value_error("Parse error", query=giql):
ast = parse_one(giql, dialect=GIQLDialect)
# Falls back to the binned plan for unsupported shapes — see
# IntersectsDuckDBIEJoinTransformer.transform_to_sql for the complete
# fallback set.
if dialect == "duckdb":
duckdb_transformer = IntersectsDuckDBIEJoinTransformer(tables_container)
with _reraise_as_value_error("Transformation error"):
duckdb_sql = duckdb_transformer.transform_to_sql(ast)
if duckdb_sql is not None:
return duckdb_sql
intersects_transformer = IntersectsBinnedJoinTransformer(
tables_container,
bin_size=intersects_bin_size,
)
merge_transformer = MergeTransformer(tables_container)
cluster_transformer = ClusterTransformer(tables_container)
generator = BaseGIQLGenerator(tables=tables_container)
with _reraise_as_value_error("Transformation error"):
ast = intersects_transformer.transform(ast)
ast = merge_transformer.transform(ast)
ast = cluster_transformer.transform(ast)
with _reraise_as_value_error("Transpilation error"):
sql = generator.generate(ast)
return sql
def _build_tables(tables: list[str | Table] | None) -> Tables:
"""Build a :class:`Tables` container from table specifications.
Parameters
----------
tables : list[str | :class:`Table`] | None
Table specifications. Strings use default column mappings.
:class:`Table` objects provide custom column mappings.
Returns
-------
Tables
Container with all tables registered.
"""
container = Tables()
if tables is None:
return container
for item in tables:
if isinstance(item, str):
container.register(item, Table(item))
else:
container.register(item.name, item)
return container
@contextmanager
def _reraise_as_value_error(prefix: str, query: str | None = None) -> Iterator[None]:
"""Re-raise non-:class:`ValueError` exceptions as :class:`ValueError` with *prefix*.
Lets user-facing :class:`ValueError`\\s from the parser, transformer chain,
and generator propagate verbatim (so the dialect's targeted error messages
survive the boundary) while wrapping unexpected exceptions in a uniform
:class:`ValueError` prefixed with the stage name. When *query* is supplied,
the original input is appended to the message so parse errors retain the
offending text.
"""
try:
yield
except ValueError:
raise
except Exception as e:
msg = f"{prefix}: {e}"
if query is not None:
msg += f"\nQuery: {query}"
raise ValueError(msg) from e