Source code for tablemage.agents.api.chatda

import asyncio
import pandas as pd
from .._src import (
    build_tablemage_analyzer,
    StorageManager,
    DataContainer,
    CanvasQueue,
    ToolingContext,
    print_debug,
)
from .._src.agents_src.single_agent import SingleAgent
from .._src.agents_src.prompt.single_agent_system_prompt import DEFAULT_SYSTEM_PROMPT
from .._src.options import options



[docs]
class ChatDA:
    """Chat Data Analyst. \
    Class for interacting with the LLMs for data analysis on tabular data.
    """


[docs]
    def __init__(
        self,
        df: pd.DataFrame,
        df_test: pd.DataFrame | None = None,
        test_size: float = 0.2,
        split_seed: int = 42,
        system_prompt: str = DEFAULT_SYSTEM_PROMPT,
        memory_size: int = 3000,
        tool_rag: bool = True,
        tool_rag_top_k: int = 5,
        tool_rag_prompt_augment: bool = True,
        python_only: bool = False,
        tools_only: bool = False,
        multimodal: bool = False,
        verbose: bool = False,
    ):
        """Initializes the ChatDA object.

        Parameters
        ----------
        df : pd.DataFrame
            The DataFrame to build the Analyzer for.

        df_test : pd.DataFrame | None
            The test DataFrame to use for the Analyzer. Defaults to None.

        test_size : float
            The size of the test set. Defaults to 0.2.

        split_seed : int
            The seed to use for the train-test split. Default is 42.

        system_prompt : str
            The system prompt to use for the LLM. Default is provided.

        memory_size : int
            The size of the memory to use. Token limit synonym. Default is 3000.

        tool_rag : bool
            If True, the RAG-based tooling is used. Default is True.

        tool_rag_top_k : int
            The top-k value to use for the RAG-based tooling. Default is 5.

        tool_rag_prompt_augment : bool
            If True, the RAG tooling prompts are augmented with history.
            Default is True.

        python_only : bool
            If True, only the Python environment is provided. \
            Default is False.

        tools_only : bool
            If True, only the non-coding tools are provided. \
            Otherwise, the Python environment is also provided. \
            python_only and tools_only cannot be True at the same time.

        multimodal : bool
            If True, multimodal LLM is used only for interpreting figures. \
            Default is False.

        verbose : bool
            If True, prints LlamaIndex agent thoughts and tool outputs. Default is False.
        """
        self._data_container = DataContainer()
        self._data_container.set_analyzer(
            build_tablemage_analyzer(
                df,
                df_test=df_test,
                test_size=test_size,
                split_seed=split_seed,
            )
        )
        print_debug(
            "Data container initialized with the Analyzer built from the "
            "provided DataFrame."
        )
        self._vectorstore_manager = StorageManager(
            multimodal=multimodal, vectorstore=False
        )
        self._canvas_queue = CanvasQueue()
        self._context = ToolingContext(
            data_container=self._data_container,
            storage_manager=self._vectorstore_manager,
            canvas_queue=self._canvas_queue,
        )
        print_debug("IO initialized.")
        print_debug("Initializing the Agent.")
        self._single_agent = SingleAgent(
            llm=options.llm_build_function(),
            context=self._context,
            memory_size=memory_size,
            tool_rag_top_k=tool_rag_top_k,
            tool_rag=tool_rag,
            tool_rag_prompt_augment=tool_rag_prompt_augment,
            system_prompt=system_prompt,
            python_only=python_only,
            tools_only=tools_only,
            verbose=verbose,
        )
        print_debug(
            f"Agent initialized. Agent type: {self._single_agent.__class__.__name__}."
        )


    async def achat(self, message: str) -> str:
        """Async version of chat. Interacts with the LLM to provide data analysis insights.

        Parameters
        ----------
        message : str
            The message to send to the LLM.

        Returns
        -------
        str
            The response from the LLM.
        """
        response = await self._single_agent.chat(message)
        return str(response)


[docs]
    def chat(self, message: str) -> str:
        """Interacts with the LLM to provide data analysis insights.

        Parameters
        ----------
        message : str
            The message to send to the LLM.

        Returns
        -------
        str
            The response from the LLM.
        """
        try:
            loop = asyncio.get_running_loop()
        except RuntimeError:
            loop = None

        if loop is not None and loop.is_running():
            # We're in an async context (e.g., Jupyter notebook)
            import nest_asyncio

            nest_asyncio.apply()
            return asyncio.get_event_loop().run_until_complete(self.achat(message))
        else:
            # No event loop running, use asyncio.run()
            return asyncio.run(self.achat(message))


    def get_transcript(self) -> str:
        """Gets the transcript of the conversation.

        Returns
        -------
        str
            The transcript of the conversation.
        """
        return self._single_agent._context.get_transcript_as_str()