import asyncio
import pandas as pd
from .._src import (
build_tablemage_analyzer,
StorageManager,
DataContainer,
CanvasQueue,
ToolingContext,
print_debug,
)
from .._src.agents_src.single_agent import SingleAgent
from .._src.agents_src.prompt.single_agent_system_prompt import DEFAULT_SYSTEM_PROMPT
from .._src.options import options
[docs]
class ChatDA:
"""Chat Data Analyst. \
Class for interacting with the LLMs for data analysis on tabular data.
"""
[docs]
def __init__(
self,
df: pd.DataFrame,
df_test: pd.DataFrame | None = None,
test_size: float = 0.2,
split_seed: int = 42,
system_prompt: str = DEFAULT_SYSTEM_PROMPT,
memory_size: int = 3000,
tool_rag: bool = True,
tool_rag_top_k: int = 5,
tool_rag_prompt_augment: bool = True,
python_only: bool = False,
tools_only: bool = False,
multimodal: bool = False,
verbose: bool = False,
):
"""Initializes the ChatDA object.
Parameters
----------
df : pd.DataFrame
The DataFrame to build the Analyzer for.
df_test : pd.DataFrame | None
The test DataFrame to use for the Analyzer. Defaults to None.
test_size : float
The size of the test set. Defaults to 0.2.
split_seed : int
The seed to use for the train-test split. Default is 42.
system_prompt : str
The system prompt to use for the LLM. Default is provided.
memory_size : int
The size of the memory to use. Token limit synonym. Default is 3000.
tool_rag : bool
If True, the RAG-based tooling is used. Default is True.
tool_rag_top_k : int
The top-k value to use for the RAG-based tooling. Default is 5.
tool_rag_prompt_augment : bool
If True, the RAG tooling prompts are augmented with history.
Default is True.
python_only : bool
If True, only the Python environment is provided. \
Default is False.
tools_only : bool
If True, only the non-coding tools are provided. \
Otherwise, the Python environment is also provided. \
python_only and tools_only cannot be True at the same time.
multimodal : bool
If True, multimodal LLM is used only for interpreting figures. \
Default is False.
verbose : bool
If True, prints LlamaIndex agent thoughts and tool outputs. Default is False.
"""
self._data_container = DataContainer()
self._data_container.set_analyzer(
build_tablemage_analyzer(
df,
df_test=df_test,
test_size=test_size,
split_seed=split_seed,
)
)
print_debug(
"Data container initialized with the Analyzer built from the "
"provided DataFrame."
)
self._vectorstore_manager = StorageManager(
multimodal=multimodal, vectorstore=False
)
self._canvas_queue = CanvasQueue()
self._context = ToolingContext(
data_container=self._data_container,
storage_manager=self._vectorstore_manager,
canvas_queue=self._canvas_queue,
)
print_debug("IO initialized.")
print_debug("Initializing the Agent.")
self._single_agent = SingleAgent(
llm=options.llm_build_function(),
context=self._context,
memory_size=memory_size,
tool_rag_top_k=tool_rag_top_k,
tool_rag=tool_rag,
tool_rag_prompt_augment=tool_rag_prompt_augment,
system_prompt=system_prompt,
python_only=python_only,
tools_only=tools_only,
verbose=verbose,
)
print_debug(
f"Agent initialized. Agent type: {self._single_agent.__class__.__name__}."
)
async def achat(self, message: str) -> str:
"""Async version of chat. Interacts with the LLM to provide data analysis insights.
Parameters
----------
message : str
The message to send to the LLM.
Returns
-------
str
The response from the LLM.
"""
response = await self._single_agent.chat(message)
return str(response)
[docs]
def chat(self, message: str) -> str:
"""Interacts with the LLM to provide data analysis insights.
Parameters
----------
message : str
The message to send to the LLM.
Returns
-------
str
The response from the LLM.
"""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop is not None and loop.is_running():
# We're in an async context (e.g., Jupyter notebook)
import nest_asyncio
nest_asyncio.apply()
return asyncio.get_event_loop().run_until_complete(self.achat(message))
else:
# No event loop running, use asyncio.run()
return asyncio.run(self.achat(message))
def get_transcript(self) -> str:
"""Gets the transcript of the conversation.
Returns
-------
str
The transcript of the conversation.
"""
return self._single_agent._context.get_transcript_as_str()