BigQuery MCP - Schema Discovery and Data Exploration
A common challenge when building data agents is that the agent does not know what tables and columns exist. This tutorial builds a BigQuery MCP server that exposes schema discovery tools so an ADK agent can explore the warehouse structure before writing queries.
MCP Server with Schema Discovery
# bq_explorer_server.py
from mcp.server.fastmcp import FastMCP
from google.cloud import bigquery
import os
mcp = FastMCP("bq-explorer")
client = bigquery.Client(project=os.environ["GCP_PROJECT_ID"])
DATASET = os.environ.get("BQ_DATASET", "shopmax")
@mcp.tool()
def list_tables() -> dict:
"""List all tables in the dataset."""
tables = list(client.list_tables(DATASET))
return {"tables": [t.table_id for t in tables]}
@mcp.tool()
def describe_table(table_name: str) -> dict:
"""Return column names, types, and descriptions for a table."""
ref = client.get_table(f"{DATASET}.{table_name}")
columns = [
{
"name": f.name,
"type": f.field_type,
"mode": f.mode,
"description": f.description or ""
}
for f in ref.schema
]
return {
"table": table_name,
"num_rows": ref.num_rows,
"columns": columns
}
@mcp.tool()
def sample_rows(table_name: str, limit: int = 5) -> dict:
"""Return a few sample rows from a table to understand data shape."""
sql = f"SELECT * FROM `{DATASET}.{table_name}` LIMIT @limit"
job_config = bigquery.QueryJobConfig(
query_parameters=[bigquery.ScalarQueryParameter("limit", "INT64", limit)]
)
rows = client.query(sql, job_config=job_config).result()
return {"rows": [dict(r) for r in rows]}
@mcp.tool()
def column_stats(table_name: str, column_name: str) -> dict:
"""Return min, max, count distinct, and null count for a column."""
sql = f"""
SELECT
MIN({column_name}) AS min_val,
MAX({column_name}) AS max_val,
COUNT(DISTINCT {column_name}) AS distinct_count,
COUNTIF({column_name} IS NULL) AS null_count
FROM `{DATASET}.{table_name}`
"""
rows = list(client.query(sql).result())
return dict(rows[0])
if __name__ == "__main__":
mcp.run(transport="stdio")
Exploration Agent
# explorer_agent.py
import asyncio
from google.adk.agents import LlmAgent
from google.adk.tools.mcp_tool.mcp_toolset import MCPToolset, StdioServerParameters
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner
from google.genai import types
async def main():
tools, exit_stack = await MCPToolset.from_server(
connection_params=StdioServerParameters(
command="python", args=["bq_explorer_server.py"]
)
)
agent = LlmAgent(
model="gemini-2.0-flash",
name="Data Explorer",
instruction="""
You are a data exploration agent. When asked to explore a dataset:
1. Call list_tables to see all available tables.
2. Call describe_table on each table to understand the schema.
3. Call sample_rows to see example data.
4. Call column_stats on interesting columns to understand distributions.
5. Summarize your findings in a structured report.
""",
tools=tools,
)
session_service = InMemorySessionService()
await session_service.create_session(app_name="explorer", user_id="u1", session_id="s1")
runner = Runner(agent=agent, app_name="explorer", session_service=session_service)
prompt = "Explore the shopmax dataset and give me a data dictionary with key statistics for each table."
content = types.Content(role="user", parts=[types.Part(text=prompt)])
async for event in runner.run_async(user_id="u1", session_id="s1", new_message=content):
if event.is_final_response():
print(event.content.parts[0].text)
await exit_stack.aclose()
asyncio.run(main())
MCP Resources for Static Schema
For schemas that rarely change, expose them as MCP resources instead of tools. This avoids repeated API calls and reduces BigQuery costs.
@mcp.resource("schema://shopmax")
def shopmax_schema() -> str:
"""Static schema documentation for the shopmax dataset."""
return """
orders: order_id, customer_id, city, product_category, amount_rs, order_date, status
customers: customer_id, name, email, city, joined_date
order_items: item_id, order_id, product_name, quantity, unit_price_rs
"""
When to Use Schema Discovery vs Static Schema
- Use
list_tables and describe_table tools when the schema evolves frequently
- Use MCP resources for stable schemas - the agent reads them once at session start
- Combine both: resources for core tables, discovery tools for newly added tables