cartography-cncf
diff --git a/‎cartography/intel/gcp/__init__.py‎
Lines changed: 63 additions & 1 deletion b/‎cartography/intel/gcp/__init__.py‎
Lines changed: 63 additions & 1 deletion
diff --git a/‎cartography/intel/gcp/bigquery_connection.py‎
Lines changed: 216 additions & 0 deletions b/‎cartography/intel/gcp/bigquery_connection.py‎
Lines changed: 216 additions & 0 deletions
@@ -15,6 +15,10 @@
 from cartography.config import Config
 from cartography.graph.job import GraphJob
 from cartography.intel.gcp import artifact_registry
+from cartography.intel.gcp import bigquery_connection
+from cartography.intel.gcp import bigquery_dataset
+from cartography.intel.gcp import bigquery_routine
+from cartography.intel.gcp import bigquery_table
 from cartography.intel.gcp import bigtable_app_profile
 from cartography.intel.gcp import bigtable_backup
 from cartography.intel.gcp import bigtable_cluster
@@ -65,7 +69,7 @@
 # and https://cloud.google.com/service-usage/docs/reference/rest/v1/services#ServiceConfig
 Services = namedtuple(
     "Services",
-    "compute storage gke dns iam kms bigtable cai aiplatform cloud_sql gcf secretsmanager artifact_registry cloud_run",
+    "compute storage gke dns iam kms bigtable cai aiplatform cloud_sql gcf secretsmanager artifact_registry cloud_run bigquery bigquery_connection",
 )
 service_names = Services(
     compute="compute.googleapis.com",
@@ -82,6 +86,8 @@
     secretsmanager="secretmanager.googleapis.com",
     artifact_registry="artifactregistry.googleapis.com",
     cloud_run="run.googleapis.com",
+    bigquery="bigquery.googleapis.com",
+    bigquery_connection="bigqueryconnection.googleapis.com",
 )
 
 
@@ -549,6 +555,62 @@ def _sync_project_resources(
                 common_job_parameters,
             )
 
+        # Build the BigQuery v2 client once — used for datasets/tables/routines
+        # and also for location discovery when syncing connections.
+        bigquery_client = None
+        if service_names.bigquery in enabled_services:
+            bigquery_client = build_client(
+                "bigquery",
+                "v2",
+                credentials=credentials,
+            )
+
+        if service_names.bigquery_connection in enabled_services:
+            logger.info("Syncing GCP project %s for BigQuery connections.", project_id)
+            bigquery_conn_client = build_client(
+                "bigqueryconnection",
+                "v1",
+                credentials=credentials,
+            )
+            bigquery_connection.sync_bigquery_connections(
+                neo4j_session,
+                bigquery_conn_client,
+                project_id,
+                gcp_update_tag,
+                common_job_parameters,
+                bigquery_client=bigquery_client,
+            )
+
+        datasets_raw = None
+        if bigquery_client is not None:
+            logger.info("Syncing GCP project %s for BigQuery.", project_id)
+            datasets_raw = bigquery_dataset.sync_bigquery_datasets(
+                neo4j_session,
+                bigquery_client,
+                project_id,
+                gcp_update_tag,
+                common_job_parameters,
+            )
+
+        if bigquery_client is not None and datasets_raw is not None:
+            bigquery_table.sync_bigquery_tables(
+                neo4j_session,
+                bigquery_client,
+                datasets_raw,
+                project_id,
+                gcp_update_tag,
+                common_job_parameters,
+            )
+
+            bigquery_routine.sync_bigquery_routines(
+                neo4j_session,
+                bigquery_client,
+                datasets_raw,
+                project_id,
+                gcp_update_tag,
+                common_job_parameters,
+            )
+
         # Clean up project-level IAM resources (service accounts and project roles)
         # Only run cleanup if IAM sync succeeded to avoid deleting valid data
         # when sync was skipped due to permission issues.
 
@@ -0,0 +1,216 @@
+import logging
+
+import neo4j
+from googleapiclient.discovery import Resource
+from googleapiclient.errors import HttpError
+
+from cartography.client.core.tx import load
+from cartography.graph.job import GraphJob
+from cartography.intel.gcp.util import gcp_api_execute_with_retry
+from cartography.intel.gcp.util import is_api_disabled_error
+from cartography.models.gcp.bigquery.connection import GCPBigQueryConnectionSchema
+from cartography.util import timeit
+
+logger = logging.getLogger(__name__)
+
+
+def _get_locations(bigquery_client: Resource, project_id: str) -> list[str]:
+    """
+    List available BigQuery locations for a project using the BigQuery v2 API.
+
+    The BigQuery Connection API does not expose a locations.list endpoint, so we
+    use the BigQuery v2 API (datasets.list with a dry-run or projects API) instead.
+    BigQuery v2 does not have a dedicated locations endpoint either, so we query
+    the Cloud Resource Manager locations via the datasets API — specifically, we
+    list datasets to discover which locations the project uses, and supplement with
+    standard multi-region locations to ensure we don't miss connections in locations
+    without datasets.
+
+    Returns a deduplicated list of location IDs (e.g., ["us", "eu", "us-central1"]).
+    """
+    # Standard BigQuery multi-region and common regional locations.
+    # Connections can exist in any of these even without datasets.
+    # See https://cloud.google.com/bigquery/docs/locations
+    default_locations = {"us", "eu"}
+
+    # Discover additional locations from existing datasets
+    locations: set[str] = set(default_locations)
+    try:
+        request = bigquery_client.datasets().list(projectId=project_id, all=True)
+        while request is not None:
+            response = gcp_api_execute_with_retry(request)
+            for ds in response.get("datasets", []):
+                loc = ds.get("location")
+                if loc:
+                    locations.add(loc.lower())
+            request = bigquery_client.datasets().list_next(
+                previous_request=request,
+                previous_response=response,
+            )
+    except HttpError as e:
+        logger.debug(
+            "Could not list datasets to discover locations for project %s - %s. "
+            "Using default locations only.",
+            project_id,
+            e,
+        )
+
+    return list(locations)
+
+
+@timeit
+def get_bigquery_connections(
+    conn_client: Resource,
+    project_id: str,
+    bigquery_client: Resource | None = None,
+) -> list[dict] | None:
+    """
+    Gets BigQuery connections for a project across all locations.
+
+    The BigQuery Connection API does not support a wildcard location, so we
+    discover locations from the BigQuery v2 API (via dataset locations) plus
+    standard multi-region locations, then query each one individually.
+
+    Args:
+        conn_client: The bigqueryconnection v1 API client.
+        project_id: The GCP project ID.
+        bigquery_client: Optional BigQuery v2 API client for location discovery.
+            If not provided, only default locations (us, eu) are queried.
+
+    Returns:
+        list[dict]: List of BigQuery connections
+        None: If the API is not enabled or access is denied
+
+    Raises:
+        HttpError: For errors other than API disabled or permission denied
+    """
+    if bigquery_client is not None:
+        locations = _get_locations(bigquery_client, project_id)
+    else:
+        locations = ["us", "eu"]
+
+    connections: list[dict] = []
+    for location in locations:
+        parent = f"projects/{project_id}/locations/{location}"
+        try:
+            request = (
+                conn_client.projects()
+                .locations()
+                .connections()
+                .list(
+                    parent=parent,
+                )
+            )
+            while request is not None:
+                response = gcp_api_execute_with_retry(request)
+                connections.extend(response.get("connections", []))
+                request = (
+                    conn_client.projects()
+                    .locations()
+                    .connections()
+                    .list_next(
+                        previous_request=request,
+                        previous_response=response,
+                    )
+                )
+        except HttpError as e:
+            if is_api_disabled_error(e) or e.resp.status in (403, 404):
+                logger.warning(
+                    "Could not retrieve BigQuery connections for %s/%s - %s. "
+                    "Skipping location.",
+                    project_id,
+                    location,
+                    e,
+                )
+                continue
+            raise
+
+    return connections
+
+
+def transform_connections(connections_data: list[dict], project_id: str) -> list[dict]:
+    transformed: list[dict] = []
+    for conn in connections_data:
+        # Determine connection type from the oneOf fields in the API response
+        connection_type = None
+        for type_key in (
+            "cloudSql",
+            "aws",
+            "azure",
+            "cloudSpanner",
+            "cloudResource",
+            "spark",
+        ):
+            if type_key in conn:
+                connection_type = type_key
+                break
+
+        cloud_sql = conn.get("cloudSql", {}) or {}
+        aws = conn.get("aws", {}) or {}
+        azure = conn.get("azure", {}) or {}
+        cloud_resource = conn.get("cloudResource", {}) or {}
+        transformed.append(
+            {
+                "name": conn["name"],
+                "friendlyName": conn.get("friendlyName"),
+                "description": conn.get("description"),
+                "connection_type": connection_type,
+                "creationTime": conn.get("creationTime"),
+                "lastModifiedTime": conn.get("lastModifiedTime"),
+                "hasCredential": conn.get("hasCredential"),
+                "cloud_sql_instance_id": cloud_sql.get("instanceId"),
+                "aws_role_arn": aws.get("accessRole", {}).get("iamRoleId"),
+                "azure_app_client_id": azure.get("federatedApplicationClientId"),
+                "service_account_id": cloud_resource.get("serviceAccountId"),
+                "project_id": project_id,
+            },
+        )
+    return transformed
+
+
+@timeit
+def load_bigquery_connections(
+    neo4j_session: neo4j.Session,
+    data: list[dict],
+    project_id: str,
+    update_tag: int,
+) -> None:
+    load(
+        neo4j_session,
+        GCPBigQueryConnectionSchema(),
+        data,
+        lastupdated=update_tag,
+        PROJECT_ID=project_id,
+    )
+
+
+@timeit
+def cleanup_bigquery_connections(
+    neo4j_session: neo4j.Session,
+    common_job_parameters: dict,
+) -> None:
+    GraphJob.from_node_schema(
+        GCPBigQueryConnectionSchema(),
+        common_job_parameters,
+    ).run(neo4j_session)
+
+
+@timeit
+def sync_bigquery_connections(
+    neo4j_session: neo4j.Session,
+    client: Resource,
+    project_id: str,
+    update_tag: int,
+    common_job_parameters: dict,
+    bigquery_client: Resource | None = None,
+) -> None:
+    logger.info("Syncing BigQuery connections for project %s.", project_id)
+    connections_raw = get_bigquery_connections(client, project_id, bigquery_client)
+
+    if connections_raw is not None:
+        connections = transform_connections(connections_raw, project_id)
+        load_bigquery_connections(neo4j_session, connections, project_id, update_tag)
+
+        cleanup_job_params = common_job_parameters.copy()
+        cleanup_job_params["PROJECT_ID"] = project_id
+        cleanup_bigquery_connections(neo4j_session, cleanup_job_params)