oracle · AmineRaouane · Jul 11, 2025 · Jul 24, 2025 · Aug 15, 2025 · Aug 25, 2025
@@ -39,6 +39,7 @@ dependencies = [
  "cryptography >=44.0.0,<45.0.0",
  "semgrep == 1.113.0",
  "email-validator >=2.2.0,<3.0.0",
+ "pydantic >= 2.11.5,<2.12.0",
 ]
 keywords = []
 # https://pypi.org/classifiers/

@@ -0,0 +1,50 @@
+# Macaron AI Module
+
+This module provides the foundation for interacting with Large Language Models (LLMs) in a provider-agnostic way. It includes an abstract client definition, provider-specific client implementations, a client factory, and utility functions for processing responses.
+
+## Module Components
+
+- **ai_client.py**
+ Defines the abstract [`AIClient`](./clients/base.py) class. This class handles the initialization of LLM configuration from the defaults and serves as the base for all specific AI client implementations.
+
+- **openai_client.py**
+ Implements the [`OpenAiClient`](./clients/openai_client.py) class, a concrete subclass of [`AIClient`](./ai_client.py). This client interacts with OpenAI-like APIs by sending requests using HTTP and processing the responses. It also validates and structures responses using the tools provided.
+
+- **ai_factory.py**
+ Contains the [`AIClientFactory`](./clients/base.py) class, which is responsible for reading provider configuration from the defaults and creating the correct AI client instance.
+
+- **ai_tools.py**
+ Offers utility functions such as `structure_response` to assist with parsing and validating the JSON response returned by an LLM. These functions ensure that responses conform to a given Pydantic model for easier downstream processing.
+
+## Usage
+
+1. **Configuration:**
+ The module reads the LLM configuration from the application defaults (using the `defaults` module). Make sure that the `llm` section in your configuration includes valid settings such as `enabled`, `api_key`, `api_endpoint`, `model`, and `context_window`.
+
+2. **Creating a Client:**
+ Use the [`AIClientFactory`](./clients/ai_factory.py) to create an AI client instance. The factory checks the configured provider and returns a client (e.g., an instance of [`OpenAiClient`](./clients/openai_client.py)) that can be used to invoke the LLM.
+
+ Example:
+ ```py
+ from macaron.ai.clients.ai_factory import AIClientFactory
+
+ factory = AIClientFactory()
+ client = factory.create_client(system_prompt="You are a helpful assistant.")
+ response = client.invoke("Hello, how can you assist me?")
+ print(response)
+ ```
+
+3. **Response Processing:**
+ When a structured response is required, pass a Pydantic model class to the `invoke` method. The [`ai_tools.py`](./ai_tools.py) module takes care of parsing and validating the response to ensure it meets the expected structure.
+
+## Logging and Error Handling
+
+- The module uses Python's logging framework to report important events, such as token usage and warnings when prompts exceed the allowed context window.
+- Configuration errors (e.g., missing API key or endpoint) are handled by raising descriptive exceptions, such as those defined in the [`ConfigurationError`](../errors.py).
+
+## Extensibility
+
+The design of the AI module is provider-agnostic. To add support for additional LLM providers:
+- Implement a new client by subclassing [`AIClient`](./clients/base.py).
+- Add the new client to the [`PROVIDER_MAPPING`](./clients/ai_factory.py).
+- Update the configuration defaults accordingly.
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
@@ -0,0 +1,43 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides utility functions for Large Language Model (LLM)."""
+import json
+import logging
+import re
+from typing import Any
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def extract_json(response_text: str) -> Any:
+ """
+ Parse the response from the LLM.
+
+ If raw JSON parsing fails, attempts to extract a JSON object from text.
+
+ Parameters
+ ----------
+ response_text: str
+ The response text from the LLM.
+
+ Returns
+ -------
+ dict[str, Any] | None
+ The structured JSON object.
+ """
+ try:
+ data = json.loads(response_text)
+ except json.JSONDecodeError:
+ logger.debug("Full JSON parse failed; trying to extract JSON from text.")
+ # If the response is not a valid JSON, try to extract a JSON object from the text.
+ match = re.search(r"\{.*\}", response_text, re.DOTALL)
+ if not match:
+ return None
+ try:
+ data = json.loads(match.group(0))
+ except json.JSONDecodeError as e:
+ logger.debug("Failed to parse extracted JSON: %s", e)
+ return None
+
+ return data
@@ -0,0 +1,9 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides a mapping of AI client providers to their respective client classes."""
+
+from macaron.ai.clients.base import AIClient
+from macaron.ai.clients.openai_client import OpenAiClient
+
+PROVIDER_MAPPING: dict[str, type[AIClient]] = {"openai": OpenAiClient}
@@ -0,0 +1,62 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module defines the AIClientFactory class for creating AI clients based on provider configuration."""
+
+import logging
+
+from macaron.ai.clients import PROVIDER_MAPPING
+from macaron.ai.clients.base import AIClient
+from macaron.config.defaults import defaults
+from macaron.errors import ConfigurationError
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+class AIClientFactory:
+ """Factory to create AI clients based on provider configuration."""
+
+ def __init__(self) -> None:
+ """
+ Initialize the AI client.
+
+ The LLM configuration is read from defaults.
+ """
+ self.params = self._load_defaults()
+
+ def _load_defaults(self) -> dict | None:
+ section_name = "llm"
+ default_values = {
+ "enabled": False,
+ "provider": "",
+ "api_key": "",
+ "api_endpoint": "",
+ "model": "",
+ }
+
+ if defaults.has_section(section_name):
+ section = defaults[section_name]
+ default_values["enabled"] = section.getboolean("enabled", default_values["enabled"])
+ for key, default_value in default_values.items():
+ if isinstance(default_value, str):
+ default_values[key] = str(section.get(key, default_value)).strip().lower()
+
+ if default_values["enabled"]:
+ for key, value in default_values.items():
+ if not value:
+ raise ConfigurationError(
+ f"AI client configuration '{key}' is required but not set in the defaults."
+ )
+
+ return default_values
+
+ def create_client(self, system_prompt: str) -> AIClient | None:
+ """Create an AI client based on the configured provider."""
+ if not self.params or not self.params["enabled"]:
+ return None
+
+ client_class = PROVIDER_MAPPING.get(self.params["provider"])
+ if client_class is None:
+ logger.error("Provider '%s' is not supported.", self.params["provider"])
+ return None
+ return client_class(system_prompt, self.params)
@@ -0,0 +1,45 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module defines the abstract AIClient class for implementing AI clients."""
+
+from abc import ABC, abstractmethod
+
+
+class AIClient(ABC):
+ """This abstract class is used to implement ai clients."""
+
+ def __init__(self, system_prompt: str, params: dict) -> None:
+ """
+ Initialize the AI client.
+
+ The LLM configuration is read from defaults.
+ """
+ self.system_prompt = system_prompt
+ self.params = params
+
+ @abstractmethod
+ def invoke(
+ self,
+ user_prompt: str,
+ temperature: float = 0.2,
+ response_format: dict | None = None,
+ ) -> dict:
+ """
+ Invoke the LLM and optionally validate its response.
+
+ Parameters
+ ----------
+ user_prompt: str
+ The user prompt to send to the LLM.
+ temperature: float
+ The temperature for the LLM response.
+ response_format: dict | None
+ The json schema to validate the response against.
+
+ Returns
+ -------
+ dict
+ The validated schema if `response_format` is provided,
+ or the raw string response if not.
+ """
@@ -0,0 +1,91 @@
+# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides a client for interacting with a Large Language Model (LLM) that is Openai like."""
+
+import logging
+from typing import Any, TypeVar
+
+from pydantic import BaseModel
+
+from macaron.ai.ai_tools import extract_json
+from macaron.ai.clients.base import AIClient
+from macaron.errors import ConfigurationError, HeuristicAnalyzerValueError
+from macaron.util import send_post_http_raw
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+T = TypeVar("T", bound=BaseModel)
+
+
+class OpenAiClient(AIClient):
+ """A client for interacting with a Large Language Model that is OpenAI API like."""
+
+ def invoke(
+ self,
+ user_prompt: str,
+ temperature: float = 0.2,
+ response_format: dict | None = None,
+ max_tokens: int = 4000,
+ seed: int = 42,
+ timeout: int = 30,
+ ) -> Any:
+ """
+ Invoke the LLM and optionally validate its response.
+
+ Parameters
+ ----------
+ user_prompt: str
+ The user prompt to send to the LLM.
+ temperature: float
+ The temperature for the LLM response.
+ response_format: dict
+ The json schema to validate the response against. If provided, the response will be parsed and validated.
+ max_tokens: int
+ The maximum number of tokens for the LLM response.
+ timeout: int
+ The timeout for the HTTP request in seconds.
+
+ Returns
+ -------
+ Optional[T | str]
+ The validated Pydantic model instance if `structured_output` is provided,
+ or the raw string response if not.
+
+ Raises
+ ------
+ HeuristicAnalyzerValueError
+ If there is an error in parsing or validating the response.
+ """
+ if not self.params["enabled"]:
+ raise ConfigurationError("AI client is not enabled. Please check your configuration.")
+
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {self.params['api_key']}"}
+ payload = {
+ "model": self.params["model"],
+ "messages": [{"role": "system", "content": self.system_prompt}, {"role": "user", "content": user_prompt}],
+ "response_format": response_format,
+ "temperature": temperature,
+ "seed": seed,
+ "max_tokens": max_tokens,
+ }
+
+ try:
+ response = send_post_http_raw(
+ url=self.params["api_endpoint"], json_data=payload, headers=headers, timeout=timeout
+ )
+ if not response:
+ raise HeuristicAnalyzerValueError("No response received from the LLM.")
+ response_json = response.json()
+ usage = response_json.get("usage", {})
+
+ if usage:
+ usage_str = ", ".join(f"{key} = {value}" for key, value in usage.items())
+ logger.info("LLM call token usage: %s", usage_str)
+
+ message_content = response_json["choices"][0]["message"]["content"]
+ return extract_json(message_content)
+
+ except Exception as e:
+ logger.error("Error during LLM invocation: %s", e)
+ raise HeuristicAnalyzerValueError(f"Failed to get or validate LLM response: {e}") from e
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
@@ -0,0 +1,2 @@
+# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
@@ -609,6 +609,9 @@ popular_packages_path =
 # A boolean value that determines whether to check the deliverability of the email address.
 check_deliverability = True
 
+# The threshold for a package's description score to be considered secure.
+score_threshold = 70
+
 # ==== The following sections are for source code analysis using Semgrep ====
 # rulesets: a reference to a 'ruleset' in this section refers to a Semgrep .yaml file containing one or more rules.
 # rules: a reference to a 'rule' in this section refers to an individual rule ID, specified by the '- id:' field in
@@ -632,3 +635,18 @@ custom_semgrep_rules_path =
 # .yaml prefix. Note, this will be ignored if a path to custom semgrep rules is not provided. This list may not contain
 # duplicated elements, meaning that ruleset names must be unique.
 disabled_custom_rulesets =
+
+[llm]
+# The LLM configuration for Macaron.
+# If enabled, the LLM will be used to analyze the results and provide insights.
+enabled = False
+# The provider for the LLM service.
+# Supported providers :
+# - openai: OpenAI's GPT models.
+provider =
+# The API key for the LLM service.
+api_key =
+# The API endpoint for the LLM service.
+api_endpoint =
+# The model to use for the LLM service.
+model =
@@ -49,6 +49,12 @@ class Heuristics(str, Enum):
  #: Indicates that the package has a similar structure to other packages maintained by the same user.
  SIMILAR_PROJECTS = "similar_projects"
 
+ #: Indicates that the package contains some code that doesn't match the docstrings.
+ MATCHING_DOCSTRINGS = "matching_docstrings"
+
+ #: Indicates that the package description is inconsistent.
+ INCONSISTENT_DESCRIPTION = "inconsistent_description"
+
 
 class HeuristicResult(str, Enum):
  """Result type indicating the outcome of a heuristic."""
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (c) 2025 - 2025, Oracle and/or its affiliates. All rights reserved.
		# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.