tl  tr
  Home | Tutorials | Articles | Videos | Products | Tools | Search
Interviews | Open Source | Tag Cloud | Follow Us | Bookmark | Contact   
 Agentic AI > MCP Protocol > BigQuery MCP - Schema Discovery and Data Exploration

BigQuery MCP - Schema Discovery and Data Exploration

Author: Venkata Sudhakar

BigQuery MCP - Schema Discovery and Data Exploration

A common challenge when building data agents is that the agent does not know what tables and columns exist. This tutorial builds a BigQuery MCP server that exposes schema discovery tools so an ADK agent can explore the warehouse structure before writing queries.

MCP Server with Schema Discovery

# bq_explorer_server.py
from mcp.server.fastmcp import FastMCP
from google.cloud import bigquery
import os

mcp = FastMCP("bq-explorer")
client = bigquery.Client(project=os.environ["GCP_PROJECT_ID"])
DATASET = os.environ.get("BQ_DATASET", "shopmax")

@mcp.tool()
def list_tables() -> dict:
    """List all tables in the dataset."""
    tables = list(client.list_tables(DATASET))
    return {"tables": [t.table_id for t in tables]}

@mcp.tool()
def describe_table(table_name: str) -> dict:
    """Return column names, types, and descriptions for a table."""
    ref = client.get_table(f"{DATASET}.{table_name}")
    columns = [
        {
            "name": f.name,
            "type": f.field_type,
            "mode": f.mode,
            "description": f.description or ""
        }
        for f in ref.schema
    ]
    return {
        "table": table_name,
        "num_rows": ref.num_rows,
        "columns": columns
    }

@mcp.tool()
def sample_rows(table_name: str, limit: int = 5) -> dict:
    """Return a few sample rows from a table to understand data shape."""
    sql = f"SELECT * FROM `{DATASET}.{table_name}` LIMIT @limit"
    job_config = bigquery.QueryJobConfig(
        query_parameters=[bigquery.ScalarQueryParameter("limit", "INT64", limit)]
    )
    rows = client.query(sql, job_config=job_config).result()
    return {"rows": [dict(r) for r in rows]}

@mcp.tool()
def column_stats(table_name: str, column_name: str) -> dict:
    """Return min, max, count distinct, and null count for a column."""
    sql = f"""
        SELECT
            MIN({column_name}) AS min_val,
            MAX({column_name}) AS max_val,
            COUNT(DISTINCT {column_name}) AS distinct_count,
            COUNTIF({column_name} IS NULL) AS null_count
        FROM `{DATASET}.{table_name}`
    """
    rows = list(client.query(sql).result())
    return dict(rows[0])

if __name__ == "__main__":
    mcp.run(transport="stdio")

Exploration Agent

# explorer_agent.py
import asyncio
from google.adk.agents import LlmAgent
from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset, StdioServerParameters
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.genai import types

async def main():
    tools, exit_stack = await MCPToolset.from_server(
        connection_params=StdioServerParameters(
            command="python", args=["bq_explorer_server.py"]
        )
    )

    agent = LlmAgent(
        model="gemini-2.0-flash",
        name="Data Explorer",
        instruction="""
You are a data exploration agent. When asked to explore a dataset:
1. Call list_tables to see all available tables.
2. Call describe_table on each table to understand the schema.
3. Call sample_rows to see example data.
4. Call column_stats on interesting columns to understand distributions.
5. Summarize your findings in a structured report.
""",
        tools=tools,
    )

    session_service = InMemorySessionService()
    await session_service.create_session(app_name="explorer", user_id="u1", session_id="s1")
    runner = Runner(agent=agent, app_name="explorer", session_service=session_service)

    prompt = "Explore the shopmax dataset and give me a data dictionary with key statistics for each table."
    content = types.Content(role="user", parts=[types.Part(text=prompt)])

    async for event in runner.run_async(user_id="u1", session_id="s1", new_message=content):
        if event.is_final_response():
            print(event.content.parts[0].text)

    await exit_stack.aclose()

asyncio.run(main())

MCP Resources for Static Schema

For schemas that rarely change, expose them as MCP resources instead of tools. This avoids repeated API calls and reduces BigQuery costs.

@mcp.resource("schema://shopmax")
def shopmax_schema() -> str:
    """Static schema documentation for the shopmax dataset."""
    return """
    orders: order_id, customer_id, city, product_category, amount_rs, order_date, status
    customers: customer_id, name, email, city, joined_date
    order_items: item_id, order_id, product_name, quantity, unit_price_rs
    """

When to Use Schema Discovery vs Static Schema

  • Use list_tables and describe_table tools when the schema evolves frequently
  • Use MCP resources for stable schemas - the agent reads them once at session start
  • Combine both: resources for core tables, discovery tools for newly added tables

 
  


  
bl  br