From be96919feb3700830099f3fa711bfcf4bd6f63c3 Mon Sep 17 00:00:00 2001 From: Eduard van Valkenburg Date: Tue, 3 Dec 2024 11:28:24 +0100 Subject: [PATCH] Python: fix for file limit and some cleanup (#9855) ### Motivation and Context We got a report stating that there was still a old limit on the number of files supplied to the Azure Assistant API. This PR fixes that and also does some further cleanup of the code. ### Description ### Contribution Checklist - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone :smile: --------- Co-authored-by: Chris <66376200+crickman@users.noreply.github.com> --- .../decisions/0031-feature-branch-strategy.md | 2 +- dotnet/docs/MODELS.md | 38 +++---- .../open_ai/assistant_content_generation.py | 24 ++-- .../agents/open_ai/azure_assistant_agent.py | 12 +- .../agents/open_ai/function_action_result.py | 19 ++++ .../agents/open_ai/open_ai_assistant_agent.py | 12 +- .../agents/open_ai/open_ai_assistant_base.py | 105 +++++++++--------- 7 files changed, 118 insertions(+), 94 deletions(-) create mode 100644 python/semantic_kernel/agents/open_ai/function_action_result.py diff --git a/docs/decisions/0031-feature-branch-strategy.md b/docs/decisions/0031-feature-branch-strategy.md index 0c852d7bb021..e2085bbef244 100644 --- a/docs/decisions/0031-feature-branch-strategy.md +++ b/docs/decisions/0031-feature-branch-strategy.md @@ -96,7 +96,7 @@ Cons: | Windows Support | No | Yes | | Linux Support | Yes | Yes | | MacOS Support | Yes | Yes | -| Number of Models | [61](https://ollama.ai/library) +Any GGUF converted | [25](https://github.com/lmstudio-ai/model-catalog/tree/main/models) +Any GGUF Converted | +| Number of Models | [61](https://ollama.com/search) +Any GGUF converted | [25](https://github.com/lmstudio-ai/model-catalog/tree/main/models) +Any GGUF Converted | | Model Support | Ollama | LM Studio | | --------------- | ------ | --------- | diff --git a/dotnet/docs/MODELS.md b/dotnet/docs/MODELS.md index 2ba8eb967bfa..918e91cd5d47 100644 --- a/dotnet/docs/MODELS.md +++ b/dotnet/docs/MODELS.md @@ -8,7 +8,7 @@ In the core Semantic Kernel repo, we plan on supporting up to four deployment ty - Dedicated API endpoints (e.g., OpenAI's APIs, Mistral.AI, and Google Gemini) - Azure AI deployments via the [model catalog](https://learn.microsoft.com/en-us/azure/ai-studio/how-to/model-catalog) -- Local deployments via [Ollama](https://ollama.ai/library) +- Local deployments via [Ollama](https://ollama.ai/) - Hugging face deployment using the [Hugging Face inference API](https://huggingface.co/docs/api-inference/index) To support these different deployment types, we will follow a similar pattern to the Azure OpenAI and OpenAI connectors. Each connector uses the same underlying model and abstractions, but the connector constructors may take different parameters. For example, the Azure OpenAI connector expects an Azure endpoint and key, whereas the OpenAI connector expects an OpenAI organization ID and API key. @@ -23,25 +23,25 @@ Please note that not all of the model interfaces are defined yet. As part of con ### OpenAI -| Priority | Model | Status | Interface | Deployment type | GitHub issue | Developer | Reviewer | -| -------- | ----------------------- | ----------- | ------------------------------ | --------------- | ------------ | ----------- | -------- | -| P0 | GPT-3.5-turbo | Complete | `IChatCompletion` | OpenAI API | N/A | N/A | N/A | -| P0 | GPT-3.5-turbo | Complete | `IChatCompletion` | Azure AI | N/A | N/A | N/A | -| P0 | GPT-4 | Complete | `IChatCompletion` | OpenAI API | N/A | N/A | N/A | -| P0 | GPT-4 | Complete | `IChatCompletion` | Azure AI | N/A | N/A | N/A | -| P0 | GPT-4v | Complete | `IChatCompletion` | OpenAI API | N/A | N/A | N/A | -| P0 | GPT-4v | Complete | `IChatCompletion` | Azure AI | N/A | N/A | N/A | -| P0 | text-embedding-ada-002 | Preview | `IEmbeddingGeneration` | OpenAI API | N/A | N/A | N/A | -| P0 | text-embedding-ada-002 | Preview | `IEmbeddingGeneration` | Azure AI | N/A | N/A | N/A | -| P0 | DALL·E 3 | Preview | `ITextToImage` | OpenAI API | N/A | N/A | N/A | -| P0 | DALL·E 3 | Preview | `ITextToImage` | Azure AI | N/A | N/A | N/A | -| P0 | Text-to-speech | Complete | `ITextToSpeech` | OpenAI API | TBD | dmytrostruk | TBD | -| P0 | Speech-to-text | Complete | `ISpeechRecognition` | OpenAI API | TBD | dmytrostruk | TBD | -| P1 | openai-whisper-large-v3 | Not started | `ISpeechRecognition` | Azure AI | TBD | TBD | TBD | -| P1 | openai-whisper-large-v3 | Not started | `ISpeechRecognition` | Hugging Face | TBD | TBD | TBD | +| Priority | Model | Status | Interface | Deployment type | GitHub issue | Developer | Reviewer | +| -------- | ----------------------- | ----------- | ------------------------------ | --------------- | ------------ | ------------ | ----------- | +| P0 | GPT-3.5-turbo | Complete | `IChatCompletion` | OpenAI API | N/A | N/A | N/A | +| P0 | GPT-3.5-turbo | Complete | `IChatCompletion` | Azure AI | N/A | N/A | N/A | +| P0 | GPT-4 | Complete | `IChatCompletion` | OpenAI API | N/A | N/A | N/A | +| P0 | GPT-4 | Complete | `IChatCompletion` | Azure AI | N/A | N/A | N/A | +| P0 | GPT-4v | Complete | `IChatCompletion` | OpenAI API | N/A | N/A | N/A | +| P0 | GPT-4v | Complete | `IChatCompletion` | Azure AI | N/A | N/A | N/A | +| P0 | text-embedding-ada-002 | Preview | `IEmbeddingGeneration` | OpenAI API | N/A | N/A | N/A | +| P0 | text-embedding-ada-002 | Preview | `IEmbeddingGeneration` | Azure AI | N/A | N/A | N/A | +| P0 | DALL·E 3 | Preview | `ITextToImage` | OpenAI API | N/A | N/A | N/A | +| P0 | DALL·E 3 | Preview | `ITextToImage` | Azure AI | N/A | N/A | N/A | +| P0 | Text-to-speech | Complete | `ITextToSpeech` | OpenAI API | TBD | dmytrostruk | TBD | +| P0 | Speech-to-text | Complete | `ISpeechRecognition` | OpenAI API | TBD | dmytrostruk | TBD | +| P1 | openai-whisper-large-v3 | Not started | `ISpeechRecognition` | Azure AI | TBD | TBD | TBD | +| P1 | openai-whisper-large-v3 | Not started | `ISpeechRecognition` | Hugging Face | TBD | TBD | TBD | | P2 | Moderation | In Progress | `ITextClassification` | OpenAI API | #5062 | Krzysztof318 | MarkWallace | -| P2 | clip-vit-base-patch32 | Not started | `IZeroShotImageClassification` | Azure AI | TBD | TBD | TBD | -| P2 | clip-vit-base-patch32 | Not started | `IZeroShotImageClassification` | Hugging Face | TBD | TBD | TBD | +| P2 | clip-vit-base-patch32 | Not started | `IZeroShotImageClassification` | Azure AI | TBD | TBD | TBD | +| P2 | clip-vit-base-patch32 | Not started | `IZeroShotImageClassification` | Hugging Face | TBD | TBD | TBD | ### Microsoft diff --git a/python/semantic_kernel/agents/open_ai/assistant_content_generation.py b/python/semantic_kernel/agents/open_ai/assistant_content_generation.py index c2a44964d7ef..872978adbdd4 100644 --- a/python/semantic_kernel/agents/open_ai/assistant_content_generation.py +++ b/python/semantic_kernel/agents/open_ai/assistant_content_generation.py @@ -85,17 +85,19 @@ def get_message_contents(message: "ChatMessageContent") -> list[dict[str, Any]]: """ contents: list[dict[str, Any]] = [] for content in message.items: - if isinstance(content, TextContent): - contents.append({"type": "text", "text": content.text}) - elif isinstance(content, ImageContent) and content.uri: - contents.append(content.to_dict()) - elif isinstance(content, FileReferenceContent): - contents.append({ - "type": "image_file", - "image_file": {"file_id": content.file_id}, - }) - elif isinstance(content, FunctionResultContent): - contents.append({"type": "text", "text": content.result}) + match content: + case TextContent(): + contents.append({"type": "text", "text": content.text}) + case ImageContent(): + if content.uri: + contents.append(content.to_dict()) + case FileReferenceContent(): + contents.append({ + "type": "image_file", + "image_file": {"file_id": content.file_id}, + }) + case FunctionResultContent(): + contents.append({"type": "text", "text": content.result}) return contents diff --git a/python/semantic_kernel/agents/open_ai/azure_assistant_agent.py b/python/semantic_kernel/agents/open_ai/azure_assistant_agent.py index 8cc560b6371a..6ed8f9e98e84 100644 --- a/python/semantic_kernel/agents/open_ai/azure_assistant_agent.py +++ b/python/semantic_kernel/agents/open_ai/azure_assistant_agent.py @@ -54,11 +54,11 @@ def __init__( enable_code_interpreter: bool | None = None, enable_file_search: bool | None = None, enable_json_response: bool | None = None, - file_ids: list[str] | None = [], + file_ids: list[str] | None = None, temperature: float | None = None, top_p: float | None = None, vector_store_id: str | None = None, - metadata: dict[str, Any] | None = {}, + metadata: dict[str, Any] | None = None, max_completion_tokens: int | None = None, max_prompt_tokens: int | None = None, parallel_tool_calls_enabled: bool | None = True, @@ -150,11 +150,11 @@ def __init__( "enable_code_interpreter": enable_code_interpreter, "enable_file_search": enable_file_search, "enable_json_response": enable_json_response, - "file_ids": file_ids, + "file_ids": file_ids or [], "temperature": temperature, "top_p": top_p, "vector_store_id": vector_store_id, - "metadata": metadata, + "metadata": metadata or {}, "max_completion_tokens": max_completion_tokens, "max_prompt_tokens": max_prompt_tokens, "parallel_tool_calls_enabled": parallel_tool_calls_enabled, @@ -199,7 +199,7 @@ async def create( temperature: float | None = None, top_p: float | None = None, vector_store_id: str | None = None, - metadata: dict[str, Any] | None = {}, + metadata: dict[str, Any] | None = None, max_completion_tokens: int | None = None, max_prompt_tokens: int | None = None, parallel_tool_calls_enabled: bool | None = True, @@ -268,7 +268,7 @@ async def create( temperature=temperature, top_p=top_p, vector_store_id=vector_store_id, - metadata=metadata, + metadata=metadata or {}, max_completion_tokens=max_completion_tokens, max_prompt_tokens=max_prompt_tokens, parallel_tool_calls_enabled=parallel_tool_calls_enabled, diff --git a/python/semantic_kernel/agents/open_ai/function_action_result.py b/python/semantic_kernel/agents/open_ai/function_action_result.py new file mode 100644 index 000000000000..48f6eb13bf4e --- /dev/null +++ b/python/semantic_kernel/agents/open_ai/function_action_result.py @@ -0,0 +1,19 @@ +# Copyright (c) Microsoft. All rights reserved. + +import logging +from dataclasses import dataclass + +from semantic_kernel.contents.chat_message_content import ChatMessageContent +from semantic_kernel.utils.experimental_decorator import experimental_class + +logger: logging.Logger = logging.getLogger(__name__) + + +@experimental_class +@dataclass +class FunctionActionResult: + """Function Action Result.""" + + function_call_content: ChatMessageContent | None + function_result_content: ChatMessageContent | None + tool_outputs: list[dict[str, str]] | None diff --git a/python/semantic_kernel/agents/open_ai/open_ai_assistant_agent.py b/python/semantic_kernel/agents/open_ai/open_ai_assistant_agent.py index 7a751e1c1018..11f2d50f4d95 100644 --- a/python/semantic_kernel/agents/open_ai/open_ai_assistant_agent.py +++ b/python/semantic_kernel/agents/open_ai/open_ai_assistant_agent.py @@ -50,11 +50,11 @@ def __init__( enable_code_interpreter: bool | None = None, enable_file_search: bool | None = None, enable_json_response: bool | None = None, - code_interpreter_file_ids: list[str] | None = [], + code_interpreter_file_ids: list[str] | None = None, temperature: float | None = None, top_p: float | None = None, vector_store_id: str | None = None, - metadata: dict[str, Any] | None = {}, + metadata: dict[str, Any] | None = None, max_completion_tokens: int | None = None, max_prompt_tokens: int | None = None, parallel_tool_calls_enabled: bool | None = True, @@ -125,11 +125,11 @@ def __init__( "enable_code_interpreter": enable_code_interpreter, "enable_file_search": enable_file_search, "enable_json_response": enable_json_response, - "code_interpreter_file_ids": code_interpreter_file_ids, + "code_interpreter_file_ids": code_interpreter_file_ids or [], "temperature": temperature, "top_p": top_p, "vector_store_id": vector_store_id, - "metadata": metadata, + "metadata": metadata or {}, "max_completion_tokens": max_completion_tokens, "max_prompt_tokens": max_prompt_tokens, "parallel_tool_calls_enabled": parallel_tool_calls_enabled, @@ -173,7 +173,7 @@ async def create( temperature: float | None = None, top_p: float | None = None, vector_store_id: str | None = None, - metadata: dict[str, Any] | None = {}, + metadata: dict[str, Any] | None = None, max_completion_tokens: int | None = None, max_prompt_tokens: int | None = None, parallel_tool_calls_enabled: bool | None = True, @@ -236,7 +236,7 @@ async def create( temperature=temperature, top_p=top_p, vector_store_id=vector_store_id, - metadata=metadata, + metadata=metadata or {}, max_completion_tokens=max_completion_tokens, max_prompt_tokens=max_prompt_tokens, parallel_tool_calls_enabled=parallel_tool_calls_enabled, diff --git a/python/semantic_kernel/agents/open_ai/open_ai_assistant_base.py b/python/semantic_kernel/agents/open_ai/open_ai_assistant_base.py index 49673feda2b1..9b9f712c0693 100644 --- a/python/semantic_kernel/agents/open_ai/open_ai_assistant_base.py +++ b/python/semantic_kernel/agents/open_ai/open_ai_assistant_base.py @@ -4,8 +4,7 @@ import json import logging from collections.abc import AsyncIterable, Iterable -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, ClassVar, Literal +from typing import TYPE_CHECKING, Annotated, Any, ClassVar, Literal from openai import AsyncOpenAI from openai.resources.beta.assistants import Assistant @@ -30,15 +29,12 @@ get_function_call_contents, get_message_contents, ) +from semantic_kernel.agents.open_ai.function_action_result import FunctionActionResult from semantic_kernel.agents.open_ai.run_polling_options import RunPollingOptions from semantic_kernel.connectors.ai.function_calling_utils import ( kernel_function_metadata_to_function_call_format, merge_function_results, ) -from semantic_kernel.contents.chat_history import ChatHistory -from semantic_kernel.contents.chat_message_content import ChatMessageContent -from semantic_kernel.contents.function_call_content import FunctionCallContent -from semantic_kernel.contents.function_result_content import FunctionResultContent from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.exceptions.agent_exceptions import ( AgentExecutionException, @@ -49,21 +45,14 @@ from semantic_kernel.utils.experimental_decorator import experimental_class if TYPE_CHECKING: + from semantic_kernel.contents.chat_history import ChatHistory + from semantic_kernel.contents.chat_message_content import ChatMessageContent + from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.kernel import Kernel logger: logging.Logger = logging.getLogger(__name__) -@experimental_class -@dataclass -class FunctionActionResult: - """Function Action Result.""" - - function_call_content: ChatMessageContent | None - function_result_content: ChatMessageContent | None - tool_outputs: list[dict[str, str]] | None - - @experimental_class class OpenAIAssistantBase(Agent): """OpenAI Assistant Base class. @@ -77,19 +66,25 @@ class OpenAIAssistantBase(Agent): client: AsyncOpenAI assistant: Assistant | None = None polling_options: RunPollingOptions = Field(default_factory=RunPollingOptions) - enable_code_interpreter: bool | None = Field(False) - enable_file_search: bool | None = Field(False) - enable_json_response: bool | None = Field(False) - code_interpreter_file_ids: list[str] | None = Field(default_factory=list, max_length=20) # type: ignore - file_search_file_ids: list[str] | None = Field(default_factory=list, max_length=20) # type: ignore - temperature: float | None = Field(None) - top_p: float | None = Field(None) + enable_code_interpreter: bool | None = False + enable_file_search: bool | None = False + enable_json_response: bool | None = False + code_interpreter_file_ids: Annotated[list[str] | None, Field(max_length=20)] = Field(default_factory=list) # type: ignore + file_search_file_ids: Annotated[ + list[str] | None, + Field( + description="There is a limit of 10000 files when using Azure Assistants API, " + "the OpenAI docs state no limit, hence this is not checked." + ), + ] = Field(default_factory=list) # type: ignore + temperature: float | None = None + top_p: float | None = None vector_store_id: str | None = None - metadata: dict[str, Any] | None = Field(default_factory=dict, max_length=16) # type: ignore - max_completion_tokens: int | None = Field(None) - max_prompt_tokens: int | None = Field(None) - parallel_tool_calls_enabled: bool | None = Field(True) - truncation_message_count: int | None = Field(None) + metadata: Annotated[dict[str, Any] | None, Field(max_length=20)] = Field(default_factory=dict) # type: ignore + max_completion_tokens: int | None = None + max_prompt_tokens: int | None = None + parallel_tool_calls_enabled: bool | None = True + truncation_message_count: int | None = None allowed_message_roles: ClassVar[list[str]] = [AuthorRole.USER, AuthorRole.ASSISTANT] polling_status: ClassVar[list[str]] = ["queued", "in_progress", "cancelling"] @@ -115,11 +110,11 @@ def __init__( enable_code_interpreter: bool | None = None, enable_file_search: bool | None = None, enable_json_response: bool | None = None, - code_interpreter_file_ids: list[str] | None = [], + code_interpreter_file_ids: list[str] | None = None, temperature: float | None = None, top_p: float | None = None, vector_store_id: str | None = None, - metadata: dict[str, Any] | None = {}, + metadata: dict[str, Any] | None = None, max_completion_tokens: int | None = None, max_prompt_tokens: int | None = None, parallel_tool_calls_enabled: bool | None = True, @@ -163,11 +158,11 @@ def __init__( "enable_code_interpreter": enable_code_interpreter, "enable_file_search": enable_file_search, "enable_json_response": enable_json_response, - "code_interpreter_file_ids": code_interpreter_file_ids, + "code_interpreter_file_ids": code_interpreter_file_ids or [], "temperature": temperature, "top_p": top_p, "vector_store_id": vector_store_id, - "metadata": metadata, + "metadata": metadata or {}, "max_completion_tokens": max_completion_tokens, "max_prompt_tokens": max_prompt_tokens, "parallel_tool_calls_enabled": parallel_tool_calls_enabled, @@ -195,7 +190,7 @@ async def create_assistant( code_interpreter_file_ids: list[str] | None = None, enable_file_search: bool | None = None, vector_store_id: str | None = None, - metadata: dict[str, str] | None = {}, + metadata: dict[str, str] | None = None, **kwargs: Any, ) -> "Assistant": """Create the assistant. @@ -209,7 +204,7 @@ async def create_assistant( enable_file_search: Enable file search. Defaults to None. (optional) code_interpreter_file_ids: The file ids. Defaults to None. (optional) vector_store_id: The vector store id. Defaults to None. (optional) - metadata: The metadata. Defaults to {}. (optional) + metadata: The metadata. Defaults to None. (optional) kwargs: Extra keyword arguments. Returns: @@ -304,7 +299,7 @@ async def create_assistant( return self.assistant - async def modify_assistant(self, assistant_id: str, **kwargs: Any) -> Assistant: + async def modify_assistant(self, assistant_id: str, **kwargs: Any) -> "Assistant": """Modify the assistant. Args: @@ -429,7 +424,7 @@ async def create_thread( self, *, code_interpreter_file_ids: list[str] | None = [], - messages: list[ChatMessageContent] | None = [], + messages: list["ChatMessageContent"] | None = [], vector_store_id: str | None = None, metadata: dict[str, str] = {}, ) -> str: @@ -494,7 +489,7 @@ async def delete(self) -> bool: self._is_deleted = True return self._is_deleted - async def add_chat_message(self, thread_id: str, message: ChatMessageContent) -> "Message": + async def add_chat_message(self, thread_id: str, message: "ChatMessageContent") -> "Message": """Add a chat message. Args: @@ -506,7 +501,7 @@ async def add_chat_message(self, thread_id: str, message: ChatMessageContent) -> """ return await create_chat_message(self.client, thread_id, message, self.allowed_message_roles) - async def get_thread_messages(self, thread_id: str) -> AsyncIterable[ChatMessageContent]: + async def get_thread_messages(self, thread_id: str) -> AsyncIterable["ChatMessageContent"]: """Get the messages for the specified thread. Args: @@ -527,7 +522,7 @@ async def get_thread_messages(self, thread_id: str) -> AsyncIterable[ChatMessage assistant_name = agent_names.get(message.assistant_id) if message.assistant_id else message.assistant_id assistant_name = assistant_name or message.assistant_id - content: ChatMessageContent = generate_message_content(str(assistant_name), message) + content: "ChatMessageContent" = generate_message_content(str(assistant_name), message) if len(content.items) > 0: yield content @@ -617,7 +612,7 @@ async def invoke( top_p: float | None = None, metadata: dict[str, str] | None = None, **kwargs: Any, - ) -> AsyncIterable[ChatMessageContent]: + ) -> AsyncIterable["ChatMessageContent"]: """Invoke the chat assistant. The supplied arguments will take precedence over the specified assistant level attributes. @@ -674,7 +669,7 @@ async def _invoke_internal( top_p: float | None = None, metadata: dict[str, str] | None = None, **kwargs: Any, - ) -> AsyncIterable[tuple[bool, ChatMessageContent]]: + ) -> AsyncIterable[tuple[bool, "ChatMessageContent"]]: """Internal invoke method. The supplied arguments will take precedence over the specified assistant level attributes. @@ -736,7 +731,7 @@ async def _invoke_internal( ) processed_step_ids = set() - function_steps: dict[str, FunctionCallContent] = {} + function_steps: dict[str, "FunctionCallContent"] = {} while run.status != "completed": run = await self._poll_run_status(run=run, thread_id=thread_id) @@ -756,6 +751,8 @@ async def _invoke_internal( if fccs: yield False, generate_function_call_content(agent_name=self.name, fccs=fccs) + from semantic_kernel.contents.chat_history import ChatHistory + chat_history = ChatHistory() _ = await self._invoke_function_calls(fccs=fccs, chat_history=chat_history) @@ -779,7 +776,7 @@ async def _invoke_internal( assert hasattr(completed_step.step_details, "tool_calls") # nosec for tool_call in completed_step.step_details.tool_calls: is_visible = False - content: ChatMessageContent | None = None + content: "ChatMessageContent | None" = None if tool_call.type == "code_interpreter": content = generate_code_interpreter_content( self.name, @@ -812,7 +809,7 @@ async def invoke_stream( self, thread_id: str, *, - messages: list[ChatMessageContent] | None = None, + messages: list["ChatMessageContent"] | None = None, ai_model_id: str | None = None, enable_code_interpreter: bool | None = False, enable_file_search: bool | None = False, @@ -825,7 +822,7 @@ async def invoke_stream( top_p: float | None = None, metadata: dict[str, str] | None = None, **kwargs: Any, - ) -> AsyncIterable[ChatMessageContent]: + ) -> AsyncIterable["ChatMessageContent"]: """Invoke the chat assistant with streaming.""" async for content in self._invoke_internal_stream( thread_id=thread_id, @@ -849,7 +846,7 @@ async def _invoke_internal_stream( self, thread_id: str, *, - messages: list[ChatMessageContent] | None = None, + messages: list["ChatMessageContent"] | None = None, ai_model_id: str | None = None, enable_code_interpreter: bool | None = False, enable_file_search: bool | None = False, @@ -862,7 +859,7 @@ async def _invoke_internal_stream( top_p: float | None = None, metadata: dict[str, str] | None = None, **kwargs: Any, - ) -> AsyncIterable[ChatMessageContent]: + ) -> AsyncIterable["ChatMessageContent"]: """Internal invoke method with streaming.""" if not self.assistant: raise AgentInitializationException("The assistant has not been created.") @@ -901,7 +898,7 @@ async def _invoke_internal_stream( **run_options, ) - function_steps: dict[str, FunctionCallContent] = {} + function_steps: dict[str, "FunctionCallContent"] = {} active_messages: dict[str, RunStep] = {} while True: @@ -991,12 +988,14 @@ async def _invoke_internal_stream( break async def _handle_streaming_requires_action( - self, run: Run, function_steps: dict[str, FunctionCallContent] + self, run: Run, function_steps: dict[str, "FunctionCallContent"] ) -> FunctionActionResult | None: fccs = get_function_call_contents(run, function_steps) if fccs: function_call_content = generate_function_call_content(agent_name=self.name, fccs=fccs) + from semantic_kernel.contents.chat_history import ChatHistory + chat_history = ChatHistory() _ = await self._invoke_function_calls(fccs=fccs, chat_history=chat_history) @@ -1201,7 +1200,7 @@ def _get_tools(self) -> list[dict[str, str]]: return tools - async def _invoke_function_calls(self, fccs: list[FunctionCallContent], chat_history: ChatHistory) -> list[Any]: + async def _invoke_function_calls(self, fccs: list["FunctionCallContent"], chat_history: "ChatHistory") -> list[Any]: """Invoke function calls and store results in chat history. Args: @@ -1217,7 +1216,9 @@ async def _invoke_function_calls(self, fccs: list[FunctionCallContent], chat_his ] return await asyncio.gather(*tasks) - def _format_tool_outputs(self, fccs: list[FunctionCallContent], chat_history: ChatHistory) -> list[dict[str, str]]: + def _format_tool_outputs( + self, fccs: list["FunctionCallContent"], chat_history: "ChatHistory" + ) -> list[dict[str, str]]: """Format tool outputs from chat history for submission. Args: @@ -1227,6 +1228,8 @@ def _format_tool_outputs(self, fccs: list[FunctionCallContent], chat_history: Ch Returns: The formatted tool outputs as a list of dictionaries. """ + from semantic_kernel.contents.function_result_content import FunctionResultContent + tool_call_lookup = { tool_call.id: tool_call for message in chat_history.messages