diff --git a/CHANGELOG.md b/CHANGELOG.md index 706e8dcb..41a38060 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,14 @@ +v1.11.7 (2026-03-12) +------------- +**Improvements** +- Added `CodeFile` class to pzmm module for creating and uploading Python code files to SAS Intelligent Decisioning + - New method `write_id_code_file()` uploads a Python code file to a specified Viya folder and registers it with the Decisions service + - Accepts code as a raw string, file path, or `Path` object + - Validates code format via the SAS Viya API before upload; can be disabled with `validate_code=False` + - Raises `ValueError` if the file already exists in the target folder, if the folder is not found, or if validation fails + - Cleans up the uploaded file if Decisions service registration fails + - See `examples/pzmm_id_code_file_example.ipynb` for usage examples + v1.11.6 (2025-11-18) -------------------- **Improvements** diff --git a/examples/pzmm_id_code_file_example.ipynb b/examples/pzmm_id_code_file_example.ipynb new file mode 100644 index 00000000..72891021 --- /dev/null +++ b/examples/pzmm_id_code_file_example.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "567032e0", + "metadata": {}, + "source": [ + "# Creating Python Code Files for SAS Intelligent Decisioning\n", + "\n", + "This notebook demonstrates how to use the `CodeFile` class to upload Python code\n", + "files that are properly formatted for use with SAS Intelligent Decisioning.\n", + "\n", + "## Overview\n", + "\n", + "SAS Intelligent Decisioning requires Python code files to follow a specific format.\n", + "\n", + "Here is a high-level summary of the formatting requirements:\n", + "\n", + "- An `execute` function is required\n", + "- An `Output:` docstring listing output variables as the first line in the execute function\n", + "- A `DependentPackages:` docstring listing required packages at the top of the file including packages that are needed but are not built-in\n", + "- Must return standard Python data types\n", + "\n", + "\n", + "The `CodeFile` class validates and uploads properly formatted Python code to SAS Viya.\n", + "\n", + "For more information about formating requirements for Python code files, see the [Rules\n", + "For Developing Python Code\n", + "Files](https://documentation.sas.com/?cdcId=edmcdc&cdcVersion=default&docsetId=edmug&docsetTarget=n04vfc1flrz8jsn1o5jblnbgx6i3.htm#n0jrohir6wzvd0n11omfautducm3)\n", + "in _SAS Intelligent Decisioning: User's Guide_.\n", + "\n", + "## Prerequisites\n", + "\n", + "- SAS Viya environment with SAS Intelligent Decisioning\n", + "- Appropriate permissions to create files in the target folder\n", + "- sasctl package installed\n", + "- Python code already formatted according to SAS Intelligent Decisioning requirements" + ] + }, + { + "cell_type": "markdown", + "id": "9da5894f", + "metadata": {}, + "source": [ + "## Setup: Connect to SAS Viya" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e27dcadc", + "metadata": {}, + "outputs": [], + "source": [ + "from sasctl import Session\n", + "from sasctl.pzmm import CodeFile\n", + "from sasctl.services import folders as folder_service\n", + "\n", + "\n", + "# Replace with your SAS Viya connection information\n", + "HOST = 'your-viya-host.com'\n", + "USERNAME = 'your-username'\n", + "PASSWORD = 'your-password'\n", + "\n", + "# Create a session\n", + "sess = Session(HOST, USERNAME, PASSWORD, verify_ssl=False)\n", + "print(f\"Connected to {HOST}\")\n", + "\n", + "try:\n", + " folder_service.create_folder('ID_python_files', \"/Public\")\n", + "except Exception as error:\n", + " print(f\"Folder already exists. {error}\")" + ] + }, + { + "cell_type": "markdown", + "id": "1e0f64d1", + "metadata": {}, + "source": [ + "## Example 1: Simple Code File\n", + "\n", + "Here is a simple example that performs a basic calculation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa33286d", + "metadata": {}, + "outputs": [], + "source": [ + "# Define properly formatted Python code\n", + "simple_code = \"\"\"\n", + "def execute(input_value):\n", + " '''Output: score, category'''\n", + " # Calculate a simple score\n", + " score = input_value * 2 + 10\n", + " category = 'High' if score > 50 else 'Low'\n", + " return score, category\n", + "\"\"\"\n", + "\n", + "# Upload the code file to SAS Viya\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=simple_code,\n", + " file_name='simple_calculator.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "print(f\"The file was uploaded successfully.\")\n", + "print(f\"File ID: {file_obj.id}\")\n", + "print(f\"File Name: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4073e537", + "metadata": {}, + "source": [ + "## Example 2: Code File with API Call\n", + "\n", + "Here is an example of how to create a code file that makes an API call to retrieve data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6608730a", + "metadata": {}, + "outputs": [], + "source": [ + "api_code = \"\"\"\n", + "'''DependentPackages: requests'''\n", + "def execute(customer_id):\n", + " '''Output: risk_score, status'''\n", + " import requests\n", + " import json\n", + "\n", + " # Make an API call\n", + " url = f\"https://api.example.com/data?id={customer_id}\"\n", + " response = requests.get(url)\n", + "\n", + " if response.status_code == 200:\n", + " data = response.json()\n", + " risk_score = data.get('risk_score', 0)\n", + " status = 'Success'\n", + " else:\n", + " risk_score = -1\n", + " status = 'Failed'\n", + " \n", + " return risk_score, status\n", + "\"\"\"\n", + "\n", + "# Upload the code file\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=api_code,\n", + " file_name='risk_score_api.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "print(f\"The file was uploaded successfully.\")\n", + "print(f\"File ID: {file_obj.id}\")\n", + "print(f\"File name: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "d3658f6f", + "metadata": {}, + "source": [ + "## Example 3: Code with Multiple Dependencies\n", + "\n", + "Here is an example of specifying multiple packages in the `DependentPackages` docstring." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48f441ff", + "metadata": {}, + "outputs": [], + "source": [ + "data_processing_code = \"\"\"\n", + "'''DependentPackages: pandas, numpy'''\n", + "def execute(value1, value2, value3, threshold):\n", + " '''Output: mean_value, std_value, result'''\n", + " import pandas as pd\n", + " import numpy as np\n", + "\n", + " # Create a simple dataframe\n", + " data = pd.DataFrame({\n", + " 'values': [value1, value2, value3]\n", + " })\n", + "\n", + " # Calculate statistics\n", + " mean_value = float(np.mean(data['values']))\n", + " std_value = float(np.std(data['values']))\n", + " result = 'Pass' if mean_value > threshold else 'Fail'\n", + "\n", + " return mean_value, std_value, result\n", + "\"\"\"\n", + "\n", + "# Upload the code file\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=data_processing_code,\n", + " file_name='data_processor.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "print(f\"This file was uploaded successfully: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "76aa2f42", + "metadata": {}, + "source": [ + "## Example 4: Reading Code from a File\n", + "\n", + "Here is an example of reading Python code from an existing file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb8ad79d", + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "# Create a properly formatted Python file\n", + "temp_code_file = Path('temp_code.py')\n", + "temp_code_file.write_text(\"\"\"\n", + "def execute(income, assets, debt):\n", + " '''Output: credit_score, decision, confidence'''\n", + " # Business logic for credit decision\n", + " credit_score = income * 0.3 + assets * 0.2 - debt * 0.5\n", + " decision = 'Approved' if credit_score > 650 else 'Denied'\n", + " confidence = min(credit_score / 850, 1.0)\n", + " \n", + " return credit_score, decision, confidence\n", + "\"\"\")\n", + "\n", + "# Upload code from file (pass Path object)\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=temp_code_file,\n", + " file_name='credit_decision.py',\n", + " folder='/Public/ID_python_files'\n", + ")\n", + "\n", + "# Clean up\n", + "temp_code_file.unlink()\n", + "\n", + "print(f\"Code uploaded from file: {file_obj.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a0223909", + "metadata": {}, + "source": [ + "## Example 5: Code File with No Parameters\n", + "\n", + "Here is an example of creating code files that do not require input parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "460f264f", + "metadata": {}, + "outputs": [], + "source": [ + "from sasctl.services import files as file_service\n", + "from sasctl.services import folders as folder_service\n", + "\n", + "config_code = \"\"\"\n", + "def execute():\n", + " '''Output: current_date, environment, version'''\n", + " import datetime\n", + "\n", + " # Get current configuration\n", + " current_date = datetime.datetime.now().strftime('%Y-%m-%d')\n", + " environment = 'production'\n", + " version = '1.0.0'\n", + "\n", + " return current_date, environment, version\n", + "\"\"\"\n", + "\n", + "# Check if file already exists and delete it.\n", + "# Warning: Deleting files might result in loss of important data or configurations.\n", + "# Ensure you have backups or that the file can be safely removed before proceeding.\n", + "\n", + "file_name = 'config_info.py'\n", + "folder_path = '/Public/ID_python_files'\n", + "\n", + "try:\n", + " folder_obj = folder_service.get_folder(folder_path)\n", + "\n", + " file_filter = f\"and(eq(name, '{file_name}'), eq(contentType, 'file'))\"\n", + " existing_file = folder_service.get(\n", + " f\"/folders/{folder_obj.id}/members\",\n", + " params={\"filter\": file_filter}\n", + " )\n", + " if len(existing_file) > 0:\n", + " print(f\"Warning: You are about to delete this file: {file_name}\")\n", + " print(\"This action might result in loss of sensitive data or configurations.\")\n", + "\n", + " file_service.delete_file({\"id\": existing_file['uri'].split('/')[-1]})\n", + " print(f\"Deleted file: {file_name}\")\n", + "except Exception as e:\n", + " print(f\"This file was not found: {file_name} {e}\")\n", + "\n", + "\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=config_code,\n", + " file_name=file_name,\n", + " folder=folder_path\n", + ")\n", + "\n", + "print(f\"Configuration code file created: {file_name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "510f7855", + "metadata": {}, + "source": [ + "## Example 6: Disable Validation\n", + "\n", + "Here is an example of skipping pre-upload validation.\n", + "\n", + "**Note:** The file will still be uploaded even if it contains formatting errors.\n", + "The errors appear later when you try to use the file in a decision. You can\n", + "view the code file in SAS Intelligent Decisioning and validate it to check for errors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95855524", + "metadata": {}, + "outputs": [], + "source": [ + "fast_code = \"\"\"\n", + "def execute(input_a, input_b):\n", + " '''Output: result'''\n", + " result = input_a + input_b\n", + " return result\n", + "\"\"\"\n", + "\n", + "# Skip pre-upload validation for faster upload\n", + "# File is still created when there are formatting errors\n", + "file_obj = CodeFile.write_id_code_file(\n", + " code=fast_code,\n", + " file_name='fast_calculator.py',\n", + " folder='/Public/ID_python_files',\n", + " validate_code=False # Skip pre-upload validation\n", + ")\n", + "\n", + "print(f\"File uploaded without pre-validation: {file_obj.name}\")\n", + "print(\"Warning: If there are formatting errors, they will appear when you use the file in a decision.\")" + ] + }, + { + "cell_type": "markdown", + "id": "396bc5f0", + "metadata": {}, + "source": [ + "## Clean Up\n", + "\n", + "Close the SAS Viya session when finished." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a1f6b08", + "metadata": {}, + "outputs": [], + "source": [ + "# Close the session\n", + "sess.close()\n", + "print(\"Session closed\")" + ] + }, + { + "cell_type": "markdown", + "id": "12a60696", + "metadata": {}, + "source": [ + "## Additional Resources\n", + "\n", + "- [SAS Intelligent Decisioning Documentation](https://documentation.sas.com/?cdcId=edmcdc&cdcVersion=default&docsetId=edmug&docsetTarget=n04vfc1flrz8jsn1o5jblnbgx6i3.htm)\n", + "- [Rules For Developing Python Code Files](https://documentation.sas.com/?cdcId=edmcdc&cdcVersion=default&docsetId=edmug&docsetTarget=n04vfc1flrz8jsn1o5jblnbgx6i3.htm#n0jrohir6wzvd0n11omfautducm3)\n", + "- [python-sasctl Documentation](https://sassoftware.github.io/python-sasctl/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/sasctl/_services/files.py b/src/sasctl/_services/files.py index f439d9d5..16b0c3d7 100644 --- a/src/sasctl/_services/files.py +++ b/src/sasctl/_services/files.py @@ -20,8 +20,8 @@ class Files(Service): The file can be associated with the URI of another identifiable object (for example, a parentUri). Every file must have an assigned content type and name. Files can be retrieved individually by using the file's - identifier or as a list of files by using a parentUri. Each file has its - content stream associated with it. After creation, the metadata that is + identifier or as a list of files by using a parentUri. Each file is + associated with its content stream. After creation, the metadata that is associated with the file or the actual content can be updated. A single file can be deleted by using a specific ID. Multiple files can be deleted by specifying a parentUri. A file can be uploaded via raw request or @@ -61,7 +61,8 @@ def create_file(cls, file, folder=None, filename=None, expiration=None): with open(file, "rb") as f: file = f.read() - else: + + elif not isinstance(file, bytes): if filename is None: raise ValueError( "`filename` must be specified if `file` is not a path." diff --git a/src/sasctl/_services/model_repository.py b/src/sasctl/_services/model_repository.py index 74fb6446..a68415e6 100644 --- a/src/sasctl/_services/model_repository.py +++ b/src/sasctl/_services/model_repository.py @@ -18,7 +18,6 @@ from ..core import current_session, delete, get, sasctl_command, RestObj from .service import Service - FUNCTIONS = { "Analytical", "Classification", @@ -161,7 +160,7 @@ def get_model_contents(cls, model): contents = cls.request_link(link, "contents") # By default, request_link() will unwrap a length-1 list. - # If that happens, re-wrap so a list is always returned. + # If that happens, re-wrap so that a list is always returned. if isinstance(contents, list): return contents @@ -186,7 +185,7 @@ def get_repository(cls, repository, refresh=False): Notes ------- - If `repository` is a complete representation of the repository it will be + If `repository` is a complete representation of the repository, it will be returned unless `refresh` is set. This prevents unnecessary REST calls when data is already available on the client. @@ -203,7 +202,7 @@ def get_repository(cls, repository, refresh=False): if cls.is_uuid(repository): try: - # Attempt to GET the repository directly. Access may be restricted, so allow HTTP 403 errors + # Attempt to GET the repository directly. Access might be restricted, so allow HTTP 403 errors # and fall back to using list_repositories() instead. return cls.get("/repositories/{id}".format(id=repository)) except HTTPError as e: diff --git a/src/sasctl/pzmm/__init__.py b/src/sasctl/pzmm/__init__.py index 4667bc65..d3eb4de0 100644 --- a/src/sasctl/pzmm/__init__.py +++ b/src/sasctl/pzmm/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) 2021, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +from .code_file import CodeFile from .git_integration import GitIntegrate from .import_model import ImportModel from .mlflow_model import MLFlowModel diff --git a/src/sasctl/pzmm/code_file.py b/src/sasctl/pzmm/code_file.py new file mode 100644 index 00000000..45882e0b --- /dev/null +++ b/src/sasctl/pzmm/code_file.py @@ -0,0 +1,242 @@ +# Copyright (c) 2026, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Tools for creating and uploading Python code files for SAS Intelligent Decisioning. +""" + +# Standard Library Imports +from pathlib import Path +from typing import Union + +# Package Imports +from ..core import RestObj +from ..services import files as file_service +from ..services import folders as folder_service +from .._services.service import Service + + +class CodeFile(Service): + """ + A class for creating Python code files formatted for SAS Intelligent Decisioning. + + SAS Intelligent Decisioning requires Python code files to follow a specific format + with an execute function that includes docstrings for output variables and + dependent packages. + """ + + _SERVICE_ROOT = "/decisions" + + @classmethod + def _validate_code_format_via_api(cls, code: str) -> bool: + """ + Validate code format using the SAS Viya validation endpoint. + + This validates Output docstring position, return statements, execute function, + and other ID-specific formatting requirements. + + Parameters + ---------- + code : str + Python code to validate. + + Raises + ------ + ValueError + If the code does not meet ID formatting requirements. + """ + try: + response = cls.post( + "/commons/validations/codeFiles", + json={"content": code, "type": "decisionPythonFile"}, + ) + + # If validation fails, the response will contain an error + if not response.get("valid", True): + error = response.get("error", {}) + if isinstance(error, dict): + error_message = error.get("message", str(error)) + else: + error_message = str(error) + raise ValueError(error_message) + + except Exception as e: + # Re-raise ValueError as-is, wrap other exceptions + if isinstance(e, ValueError): + raise + raise ValueError(f"Code validation failed: {str(e)}") + + @classmethod + def _find_file_in_folder( + cls, folder_id: str, file_name: str + ) -> Union[RestObj, None]: + """ + Find a file in a specific folder by name. + + Parameters + ---------- + folder_id : str + The ID of the folder to search in. + file_name : str + Name of the file to find. + + Returns + ------- + RestObj or None + File details if found, None otherwise. + """ + + # Search for the file in the folder + file_filter = f"and(eq(name, '{file_name}'), eq(contentType, 'file'))" + response = folder_service.get( + f"/folders/{folder_id}/members", params={"filter": file_filter} + ) + + if len(response) <= 0: + # No files with file_name were found. + return None + + file_uri = response.get("uri") + + if file_uri: + return response + + return None + + @classmethod + def _load_python_code(cls, code: Union[str, Path]) -> str: + """ + Load and prepare a Python code file for SAS Intelligent Decisioning. + + This method loads code from a string or file path and performs basic checks. + Actual validation against ID format requirements happens during upload. + + Parameters + ---------- + code : str or pathlib.Path + Python code as a string or path to a Python file. + + Returns + ------- + str + The Python code file content. + + Raises + ------ + ValueError + If code is empty or file is not found. + """ + # Check for empty string first + if isinstance(code, str) and (not code or not code.strip()): + raise ValueError("Code cannot be empty") + + # Convert string path to Path object if needed (with error handling for invalid paths) + try: + if isinstance(code, str) and Path(code).exists(): + code = Path(code) + except OSError: + # Path is invalid (for example, too long or malformed) - treat as raw code string + pass + + if isinstance(code, Path): + if not code.exists(): + raise ValueError(f"Code file not found: {code}") + code = code.read_text() + + if not code or not code.strip(): + raise ValueError("Code cannot be empty") + + return code + + @classmethod + def write_id_code_file( + cls, + code: Union[str, Path], + file_name: str, + folder: Union[str, dict], + validate_code: bool = True, + ) -> RestObj: + """ + Validate and upload a Python code file to SAS Intelligent Decisioning. + + This method validates a properly formatted ID Python code file and uploads + it to a specified folder in SAS Viya, it then registers it with the Decisions service. + + Parameters + ---------- + code : str or pathlib.Path + Python code as a string or path to a Python file. The code must already + be formatted for ID with an execute function and proper docstrings. + file_name : str + Name for the code file (for example, 'my_code.py'). Must end with .py + folder : str or dict + Target folder in SAS Viya. Can be a folder name, path (for example, + '/Public/MyFolder'), or folder object returned by folders.get_folder(). + validate_code: bool + If True, validates code format via API before upload. If False, skips validation. + + Returns + ------- + RestObj + Code file object returned by the Decisions service. + + Raises + ------ + ValueError + If file_name does not end with .py, if folder is not found, if code + does not contain required docstrings, or if code is invalid. + SyntaxError + If the provided code has syntax errors. + """ + # Validate file_name + if not file_name.endswith(".py"): + raise ValueError("file_name must end with .py extension") + + # Load the code (handles file paths, empty checks, etc.) + loaded_code = cls._load_python_code(code) + + # Validate code format if requested + if validate_code: + cls._validate_code_format_via_api(loaded_code) + + # Verify that the folder exists + folder_obj = folder_service.get_folder(folder) + if not folder_obj: + raise ValueError(f"Folder '{folder}' not found") + + # Verify that a file with that name does not exist + file_obj = cls._find_file_in_folder(folder_obj.id, file_name) + if file_obj: + raise ValueError(f"File '{file_name}' already exists in this folder.") + + # Upload the file to SAS Viya Files service + file_obj = file_service.create_file( + file=loaded_code.encode("utf-8"), + folder=folder, + filename=file_name, + ) + + data = { + "name": file_name, + "fileUri": f"/files/files/{file_obj.id}", + "type": "decisionPythonFile", + } + + try: + code_file = cls.post("/codeFiles", json=data) + except Exception as post_error: + # Try to clean up the uploaded file since code file creation failed + try: + # There is no response from deleting a file object + file_service.delete_file({"id": file_obj["id"]}) + + except Exception as delete_error: + raise RuntimeError( + f"There was an error creating the code file: {post_error}." + f"Also, could not delete the orphaned file: {delete_error}" + ) + raise RuntimeError( + f"There was an error with creating the code file: {post_error}" + ) + + return code_file diff --git a/src/sasctl/pzmm/write_json_files.py b/src/sasctl/pzmm/write_json_files.py index 8eb98bf9..c5974c0d 100644 --- a/src/sasctl/pzmm/write_json_files.py +++ b/src/sasctl/pzmm/write_json_files.py @@ -14,7 +14,7 @@ from pathlib import Path from typing import Any, Generator, List, Optional, Type, Union -# Third Party Imports +# Third-Party Imports import pandas as pd from pandas import DataFrame, Series @@ -22,7 +22,7 @@ from sasctl.pzmm.write_score_code import ScoreCode as sc from ..core import current_session from ..utils.decorators import deprecated, experimental -from ..utils.misc import check_if_jupyter +from ..utils.misc import check_if_jupyter, IMPORT_TO_INSTALL_MAPPING try: # noinspection PyPackageRequirements @@ -45,7 +45,7 @@ class NpEncoder(json.JSONEncoder): pass -# TODO: add converter for any type of dataset (list, dataframe, numpy array) +# TODO: add converter for any type of data set (list, dataframe, numpy array) # Constants INPUT = "inputVar.json" @@ -300,14 +300,14 @@ def write_model_properties_json( Model Manager. If these values are detected, they will be supplied as custom user properties. - If a json_path is supplied, this function outputs a JSON file named + If a json_path is supplied, this function writes a JSON file named "ModelProperties.json". Otherwise, a dict is returned. Parameters ---------- model_name : str User-defined model name. This value is overwritten by SAS Model Manager - based on the name of the zip file used for importing the model. + based on the name of the ZIP file used for importing the model. target_variable : str Target variable to be predicted by the model. target_values : list, optional @@ -477,7 +477,7 @@ def write_file_metadata_json( """ Writes a file metadata JSON file pointing to all relevant files. - This function outputs a JSON file named "fileMetadata.json". + This function writes a JSON file named "fileMetadata.json". Parameters ---------- @@ -651,7 +651,7 @@ def add_tuple_to_fitstat( Raises ------ ValueError - If an parameter within the tuple list is not a tuple or has a length + If a parameter within the tuple list is not a tuple or has a length different from the expected three. """ @@ -696,7 +696,7 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: Returns ------- list of dict - List of dicts with the user provided values inputted. + List of dicts with the user provided values entered. """ while True: input_param_name = input("What is the parameter name?\n") @@ -706,7 +706,7 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: f"{input_param_name} is not a valid parameter.", category=UserWarning, ) - if input("Would you like to input more parameters? (Y/N)") == "N": + if input("Would you like to enter more parameters? (Y/N)") == "N": break continue param_value = input("What is the parameter's value?\n") @@ -723,7 +723,7 @@ def user_input_fitstat(cls, data: List[dict]) -> List[dict]: f"1, 2, or 3 or TRAIN, TEST, or VALIDATE respectively.", category=UserWarning, ) - if input("Would you like to input more parameters? (Y/N)") == "N": + if input("Would you like to enter more parameters? (Y/N)") == "N": break continue data[data_role - 1]["dataMap"][param_name] = param_value @@ -929,7 +929,7 @@ def assess_model_bias( maxdiff_dfs=maxdiff_dfs, datarole=datarole ) - # getting json files + # Getting JSON files json_files = cls.bias_dataframes_to_json( groupmetrics=group_metrics, maxdifference=max_differences, @@ -973,7 +973,7 @@ def format_max_differences( Returns ------- pandas.DataFrame - A singluar DataFrame containing all max differences data + A singular DataFrame containing all max differences data """ maxdiff_df = pd.concat(maxdiff_dfs) maxdiff_df = maxdiff_df.rename( @@ -1102,7 +1102,7 @@ def bias_dataframes_to_json( conventions (no spaces and the name cannot begin with a number or symbol). Required for regression problems. The default value is None. json_path : str or pathlib.Path, optional - Location for the output JSON files. If a path is passed, the json files will populate in the directory and + Location for the output JSON files. If a path is passed, the JSON files will populate in the directory and the function will return None, unless return_dataframes is True. Otherwise, the function will return the json strings in a dictionary (dict["maxDifferences.json"] and dict["groupMetrics.json"]). The default value is None. @@ -1200,19 +1200,19 @@ def calculate_model_statistics( cutoff: Optional[float] = None, ) -> Union[dict, None]: """ - Calculates fit statistics (including ROC and Lift curves) from datasets and then + Calculates fit statistics (including ROC and Lift curves) from data sets and then either writes them to JSON files or returns them as a single dictionary. Calculations are performed using a call to SAS CAS via the swat package. An error will be raised if the swat package is not installed or if a connection to a SAS Viya system is not possible. - Datasets must contain the actual and predicted values and may optionally contain + Data sets must contain the actual and predicted values and can optionally contain the predicted probabilities. If no probabilities are provided, a dummy - probability dataset is generated based on the predicted values and normalized by + probability data set is generated based on the predicted values and normalized by the target value. - Datasets can be provided in the following forms, with the assumption that data + Data sets can be provided in the following forms, with the assumption that data is ordered as `actual`, `predict`, and `probability` respectively: * pandas dataframe: the actual and predicted values are their own columns @@ -1220,7 +1220,7 @@ def calculate_model_statistics( * numpy array: the actual and predicted values are their own columns or rows \ and ordered such that the actual values come first and the predicted second - If a json_path is supplied, then this function outputs a set of JSON files named + If a json_path is supplied, then this function writes a set of JSON files named "dmcas_fitstat.json", "dmcas_roc.json", "dmcas_lift.json". Parameters @@ -1228,11 +1228,11 @@ def calculate_model_statistics( target_value : str, int, or float Target event value for model prediction events. validate_data : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the validation data. The default value is None. + Data set pertaining to the validation data. The default value is None. train_data : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the training data. The default value is None. + Data set pertaining to the training data. The default value is None. test_data : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the test data. The default value is None. + Data set pertaining to the test data. The default value is None. json_path : str or pathlib.Path, optional Location for the output JSON files. The default value is None. target_type: str, optional @@ -1242,7 +1242,7 @@ def calculate_model_statistics( Returns ------- dict - Dictionary containing a key-value pair representing the files name and json + Dictionary containing a key-value pair representing the files name and JSON dumps respectively. Raises @@ -1375,21 +1375,21 @@ def check_for_data( test: Union[DataFrame, List[list], Type["numpy.ndarray"]] = None, ) -> list: """ - Check which datasets were provided and return a list of flags. + Check which data sets were provided and return a list of flags. Parameters ---------- validate : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the validation data. The default value is None. + Data set pertaining to the validation data. The default value is None. train : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the training data. The default value is None. + Data set pertaining to the training data. The default value is None. test : pandas.DataFrame, list of list, or numpy.ndarray, optional - Dataset pertaining to the test data. The default value is None. + Data set pertaining to the test data. The default value is None. Returns ------- data_partitions : list - A list of flags indicating which partitions have datasets. + A list of flags indicating which partitions have data sets. Raises ------ @@ -1416,7 +1416,7 @@ def stat_dataset_to_dataframe( target_type: str = "classification", ) -> DataFrame: """ - Convert the user supplied statistical dataset from either a pandas DataFrame, + Convert the user supplied statistical data set from either a pandas DataFrame, list of lists, or numpy array to a DataFrame formatted for SAS CAS upload. If the prediction probabilities are not provided, the prediction data will be @@ -1428,7 +1428,7 @@ def stat_dataset_to_dataframe( Parameters ---------- data : pandas.DataFrame, list of list, or numpy.ndarray - Dataset representing the actual and predicted values of the model. May also + Data set representing the actual and predicted values of the model. May also include the prediction probabilities. target_value : str, int, or float, optional Target event value for model prediction events. Used for creating a binary @@ -1438,7 +1438,7 @@ def stat_dataset_to_dataframe( Returns ------- data : pandas.DataFrame - Dataset formatted for SAS CAS upload. + Data set formatted for SAS CAS upload. Raises ------ @@ -1493,12 +1493,12 @@ def apply_dataframe_to_json( ) -> dict: """ Map the values of the ROC or Lift charts from SAS CAS to the dictionary - representation of the respective json file. + representation of the respective JSON file. Parameters ---------- json_dict : dict - Dictionary representation of the ROC or Lift chart json file. + Dictionary representation of the ROC or Lift chart JSON file. partition : int Numerical representation of the data partition. Either 0, 1, or 2. stat_df : pandas.DataFrame @@ -1510,7 +1510,7 @@ def apply_dataframe_to_json( Returns ------- json_dict : dict - Dictionary representation of the ROC or Lift chart json file, with the + Dictionary representation of the ROC or Lift chart JSON file, with the values from the SAS CAS percentile action set added in. """ for row_num in range(len(stat_df)): @@ -1624,9 +1624,9 @@ def create_requirements_json( current working environment. Then the package and version are written to a requirements.json file. - WARNING: The methods utilized in this function can determine package + WARNING: The methods used in this function can determine package dependencies from provided scripts and pickle files, but CANNOT determine the - required package versions without being in the development environment which + required package versions without being in the development environment, in which they were originally created. This function works best when run in the model development environment and is @@ -1636,11 +1636,11 @@ def create_requirements_json( the requirements.json file's package versions to match the model development environment. - When provided with an output_path argument, this function outputs a JSON file + When provided with an output_path argument, this function writes a JSON file named "requirements.json". If create_requirements_txt is True, it will also create a requirements.txt file. Otherwise, a list of dicts is returned. - Note: requirements.txt file is only created when both output_path and + Note: The requirements.txt file is created only when the both output_path and create_requirements_txt are specified. Parameters @@ -1657,7 +1657,7 @@ def create_requirements_json( Returns ------- list of dict - List of dictionary representations of the json file contents, split into + List of dictionary representations of the JSON file contents, split into each package and/or warning. """ pickle_packages = [] @@ -1677,29 +1677,6 @@ def create_requirements_json( item[0] for item in package_and_version if not item[1] ] - IMPORT_TO_INSTALL_MAPPING = { - # Data Science & ML Core - "sklearn": "scikit-learn", - "skimage": "scikit-image", - "cv2": "opencv-python", - "PIL": "Pillow", - # Data Formats & Parsing - "yaml": "PyYAML", - "bs4": "beautifulsoup4", - "docx": "python-docx", - "pptx": "python-pptx", - # Date & Time Utilities - "dateutil": "python-dateutil", - # Database Connectors - "MySQLdb": "MySQL-python", - "psycopg2": "psycopg2-binary", - # System & Platform - "win32api": "pywin32", - "win32com": "pywin32", - # Scientific Libraries - "Bio": "biopython", - } - # Map import names to their corresponding package installation names package_and_version = [ (IMPORT_TO_INSTALL_MAPPING.get(name, name), version) @@ -1812,7 +1789,7 @@ def get_code_dependencies( Get the package dependencies for all Python scripts in the provided directory path. - Note that currently this functionality only works for .py files. + Note that currently this functionality works only for .py files. Parameters ---------- @@ -1957,7 +1934,7 @@ def get_package_names(stream: Union[bytes, str]) -> List[str]: # Convert to a pandas dataframe for ease of conditional filtering df_pickle = pd.DataFrame({"opcode": opcode, "arg": arg, "pos": pos}) - # For all opcodes labelled GLOBAL or STACK_GLOBAL pull out the package names + # For all opcodes labeled GLOBAL or STACK_GLOBAL pull out the package names global_stack = df_pickle[ (df_pickle.opcode == "GLOBAL") | (df_pickle.opcode == "STACK_GLOBAL") ] @@ -1967,12 +1944,12 @@ def get_package_names(stream: Union[bytes, str]) -> List[str]: global_stack.arg.str.split().str[0].str.split(".").str[0].unique().tolist() ) - # For all opcodes labelled BINUNICODE or SHORT_BINUNICODE grab the package names + # For all opcodes labeled BINUNICODE or SHORT_BINUNICODE grab the package names binunicode = df_pickle[ (df_pickle.opcode == "BINUNICODE") | (df_pickle.opcode == "SHORT_BINUNICODE") ] - # From the argument column, split the string by `.`, then return only unique + # From the argument column, split the string by `.`, and then return only unique # cells with at least one split arg_binunicode = binunicode.arg.str.split(".") unicode_packages = ( @@ -2419,7 +2396,7 @@ def generate_model_card( target_value=target_value, ) - # Formats all new ModelProperties information into one dictionary that can be used to update the json file + # Formats all new ModelProperties information into one dictionary that can be used to update the JSON file update_dict["trainTable"] = training_table update_dict["selectionStatistic"] = selection_statistic update_dict["algorithm"] = algorithm @@ -2520,7 +2497,7 @@ def generate_outcome_average( Returns ------- dict - Returns a dictionary with a key value pair that represents the outcome average. + Returns a dictionary with a key-value pair that represents the outcome average. """ import numbers @@ -2563,7 +2540,7 @@ def get_selection_statistic_value( Returns ------- float - Returns the numerical value assoicated with the chosen selection statistic. + Returns the numerical value associated with the chosen selection statistic. """ if isinstance(model_files, dict): if FITSTAT not in model_files: @@ -2578,7 +2555,7 @@ def get_selection_statistic_value( or fitstat["dataMap"][selection_statistic] == None ): raise RuntimeError( - "The chosen selection statistic was not generated properly. Please ensure the value has been " + "The chosen selection statistic was not generated properly. Please ensure that the value has been " "properly created then try again." ) return fitstat["dataMap"][selection_statistic] @@ -2597,7 +2574,7 @@ def get_selection_statistic_value( or fitstat["dataMap"][selection_statistic] == None ): raise RuntimeError( - "The chosen selection statistic was not generated properly. Please ensure the value has been " + "The chosen selection statistic was not generated properly. Please ensure that the value has been " "properly created then try again." ) return fitstat["dataMap"][selection_statistic] diff --git a/src/sasctl/utils/misc.py b/src/sasctl/utils/misc.py index b2a33658..5c8536a5 100644 --- a/src/sasctl/utils/misc.py +++ b/src/sasctl/utils/misc.py @@ -6,9 +6,34 @@ import random import string +import warnings from .decorators import versionadded +# Mapping of Python import names to their PyPI installation names +IMPORT_TO_INSTALL_MAPPING = { + # Data Science & ML Core + "sklearn": "scikit-learn", + "skimage": "scikit-image", + "cv2": "opencv-python", + "PIL": "Pillow", + # Data Formats & Parsing + "yaml": "PyYAML", + "bs4": "beautifulsoup4", + "docx": "python-docx", + "pptx": "python-pptx", + # Date & Time Utilities + "dateutil": "python-dateutil", + # Database Connectors + "MySQLdb": "MySQL-python", + "psycopg2": "psycopg2-binary", + # System & Platform + "win32api": "pywin32", + "win32com": "pywin32", + # Scientific Libraries + "Bio": "biopython", +} + def installed_packages(): """List Python packages installed in the current environment. @@ -18,10 +43,10 @@ def installed_packages(): Notes ----- - Uses pip freeze functionality so pip module must be present. For pip + Uses pip freeze functionality, so pip module must be present. For pip versions >=20.1, this functionality fails to provide versions for some - conda installed, locally installed, and url installed packages. Instead - uses the pkg_resources package which is typically bundled with pip. + conda installed, locally installed, and url installed packages. Instead, + uses the importlib package, which is typically bundled with python. """ from packaging import version @@ -30,14 +55,14 @@ def installed_packages(): import pip if version.parse(pip.__version__) >= version.parse("20.1"): - import pkg_resources + from importlib.metadata import distributions - return [ - p.project_name + "==" + p.version for p in pkg_resources.working_set - ] + output = [p.name + "==" + p.version for p in distributions()] + return output else: from pip._internal.operations import freeze except ImportError: + try: from pip.operations import freeze except ImportError: @@ -49,7 +74,7 @@ def installed_packages(): @versionadded(version="1.5.1") def random_string(length): - """Generates a random alpha-numeric string of a given length. + """Generates a random alphanumeric string of a given length. Parameters ---------- @@ -62,7 +87,7 @@ def random_string(length): """ - # random.choices() wasn't added until Python 3.6, so repeatedly call .choice() instead + # random.choices() was not added until Python 3.6, so repeatedly call .choice() instead chars = string.ascii_letters + string.digits return "".join(random.choice(chars) for _ in range(length)) @@ -70,7 +95,7 @@ def random_string(length): @versionadded(version="1.9.0") def check_if_jupyter() -> bool: """ - Check if the code is being executed from a jupyter notebook. + Check if the code is being executed from a Jupyter notebook. Source: https://stackoverflow.com/questions/47211324/check-if-module-is-running-in- jupyter-or-not @@ -78,7 +103,7 @@ def check_if_jupyter() -> bool: Returns ------- bool - True if a jupyter notebook is detected. False otherwise. + True if a Jupyter notebook is detected. False otherwise. """ try: shell = get_ipython().__class__.__name__ diff --git a/tests/integration/test_pymas.py b/tests/integration/test_pymas.py index 7973211b..aef90971 100644 --- a/tests/integration/test_pymas.py +++ b/tests/integration/test_pymas.py @@ -10,7 +10,6 @@ import pytest - pytest.skip( "PyMAS functionality is deprecated and will be removed in a future release.", allow_module_level=True, @@ -241,9 +240,7 @@ def test_from_pickle(train_data, pickle_file): end; endpackage; -""".lstrip( - "\n" - ) +""".lstrip("\n") assert isinstance(p, PyMAS) @@ -343,9 +340,7 @@ def hello_world(): end; endpackage; -""".lstrip( - "\n" - ) +""".lstrip("\n") f = tmpdir.join("model.py") f.write(code) diff --git a/tests/unit/test_code_file.py b/tests/unit/test_code_file.py new file mode 100644 index 00000000..112fc7dc --- /dev/null +++ b/tests/unit/test_code_file.py @@ -0,0 +1,517 @@ +#!/usr/bin/env python +# encoding: utf-8 +# +# Copyright © 2026, SAS Institute Inc., Cary, NC, USA. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +from unittest import mock +import pytest +import tempfile +from pathlib import Path + +from sasctl.pzmm import CodeFile + + +class TestValidateCodeFormatViaAPI: + """Tests for _validate_code_format_via_api method.""" + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_success(self, mock_post): + """Test successful code validation via API.""" + mock_post.return_value = {"valid": True} + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + # Should not raise any exception + CodeFile._validate_code_format_via_api(code) + + mock_post.assert_called_once_with( + "/commons/validations/codeFiles", + json={"content": code, "type": "decisionPythonFile"}, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_with_error_message(self, mock_post): + """Test validation failure with error message.""" + mock_post.return_value = { + "valid": False, + "error": { + "message": "Output docstring must be the first line in execute function" + }, + } + + code = """ +def execute(): + result = 'test' + 'Output:result' + return result +""" + with pytest.raises(ValueError, match="Output docstring must be the first line"): + CodeFile._validate_code_format_via_api(code) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_with_error_no_message(self, mock_post): + """Test validation failure with error but no message.""" + mock_post.return_value = {"valid": False, "error": "Validation failed"} + + code = "invalid code" + + with pytest.raises(ValueError, match="Validation failed"): + CodeFile._validate_code_format_via_api(code) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + def test_validate_code_format_api_exception(self, mock_post): + """Test handling of API exceptions during validation.""" + mock_post.side_effect = RuntimeError("API connection failed") + + code = "def execute():\n return 1" + + with pytest.raises( + ValueError, match="Code validation failed: API connection failed" + ): + CodeFile._validate_code_format_via_api(code) + + +class TestFindFileInFolder: + """Tests for _find_file_in_folder method.""" + + @mock.patch("sasctl.services.folders.get") + def test_find_file_in_folder_found(self, mock_get): + """Test finding an existing file in a folder.""" + mock_get.return_value = { + "uri": "files/files/acde070d-8c4c-4f0d-9d8a-162843c10333" + } + + result = CodeFile._find_file_in_folder("folder-456", "test.py") + + assert result is not None + assert result == mock_get.return_value + mock_get.assert_called_once_with( + "/folders/folder-456/members", + params={"filter": "and(eq(name, 'test.py'), eq(contentType, 'file'))"}, + ) + + @mock.patch("sasctl.services.folders.get") + def test_find_file_in_folder_not_found(self, mock_get): + """Test when file is not found in folder.""" + mock_response = mock.MagicMock() + mock_response.__len__ = mock.MagicMock(return_value=0) + mock_get.return_value = mock_response + + result = CodeFile._find_file_in_folder("folder-456", "nonexistent.py") + + assert result is None + + @mock.patch("sasctl.services.folders.get") + def test_find_file_in_folder_no_uri(self, mock_get): + """Test when response has no URI.""" + mock_get.return_value = {"id": "unique-id"} + + result = CodeFile._find_file_in_folder("folder-456", "test.py") + + assert result is None + + +class TestLoadPythonCode: + """Tests for _load_python_code method.""" + + def test_load_python_code_from_string(self): + """Test loading code from a string.""" + code = "def execute():\n return 'test'" + result = CodeFile._load_python_code(code) + assert result == code + + def test_load_python_code_from_file(self): + """Test loading code from a file path.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def execute():\n return 'test'") + temp_path = Path(f.name) + + try: + result = CodeFile._load_python_code(temp_path) + assert result == "def execute():\n return 'test'" + finally: + temp_path.unlink() + + def test_load_python_code_from_string_path(self): + """Test loading code from a string path.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def test():\n pass") + temp_path = f.name + + try: + result = CodeFile._load_python_code(temp_path) + assert result == "def test():\n pass" + finally: + Path(temp_path).unlink() + + def test_load_python_code_empty_string(self): + """Test that empty string raises ValueError.""" + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code("") + + def test_load_python_code_whitespace_only(self): + """Test that whitespace-only string raises ValueError.""" + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code(" \n\t ") + + def test_load_python_code_file_not_found(self): + """Test that non-existent file raises ValueError.""" + with pytest.raises(ValueError, match="Code file not found"): + CodeFile._load_python_code(Path("/nonexistent/path/to/file.py")) + + def test_load_python_code_empty_file(self): + """Test that empty file raises ValueError.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + temp_path = Path(f.name) + + try: + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code(temp_path) + finally: + temp_path.unlink() + + def test_load_python_code_whitespace_only_file(self): + """Test that file with only whitespace raises ValueError.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write(" \n\n\t ") + temp_path = Path(f.name) + + try: + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile._load_python_code(temp_path) + finally: + temp_path.unlink() + + def test_load_python_code_invalid_path_string(self): + """Test that invalid path string is treated as raw code.""" + # A string that looks like it could be a path but is actually invalid + code = "/some/path/that/does/not/exist.py but is actually code" + result = CodeFile._load_python_code(code) + assert result == code + + +class TestWriteIDCodeFile: + """Tests for write_id_code_file method.""" + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_success( + self, mock_find_file, mock_get_folder, mock_create_file, mock_post + ): + """Test successful upload of a code file to Viya.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_file_obj.name = "test_code.py" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_code_file.name = "test_code.py" + mock_code_file.id = "cf-12345" + mock_post.return_value = mock_code_file + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + + result = CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + assert mock_create_file.called + assert mock_post.called + assert result.name == "test_code.py" + + # Verify post was called with correct data + mock_post.assert_called_once_with( + "/codeFiles", + json={ + "name": "test_code.py", + "fileUri": "/files/files/12345", + "type": "decisionPythonFile", + }, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile._validate_code_format_via_api") + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_with_validation( + self, + mock_find_file, + mock_get_folder, + mock_create_file, + mock_post, + mock_validate, + ): + """Test upload with validation enabled.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_post.return_value = mock_code_file + + code = "def execute():\n return 'test'" + + result = CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=True, + ) + + # Verify validation was called + mock_validate.assert_called_once_with(code) + assert result == mock_code_file + + def test_write_id_code_file_invalid_filename(self): + """Test that invalid file names are rejected.""" + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 42 +""" + + with pytest.raises(ValueError, match="file_name must end with .py"): + CodeFile.write_id_code_file( + code=code, file_name="test_code.txt", folder="/Public/TestFolder" + ) + + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_already_exists(self, mock_find_file, mock_get_folder): + """Test that uploading a file that already exists raises error.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + + mock_existing_file = mock.MagicMock() + mock_existing_file.id = "existing-file-id" + mock_existing_file.name = "duplicate.py" + mock_find_file.return_value = mock_existing_file + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + + with pytest.raises( + ValueError, match="File 'duplicate.py' already exists in this folder" + ): + CodeFile.write_id_code_file( + code=code, + file_name="duplicate.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + @mock.patch("sasctl.services.folders.get_folder") + def test_write_id_code_file_folder_not_found(self, mock_get_folder): + """Test that referencing a non-existent folder raises error.""" + mock_get_folder.return_value = None + + code = """ +def execute(): + 'Output:result' + 'DependentPackages:' + result = 'test' + return result +""" + + with pytest.raises(ValueError, match="Folder '/NonExistent' not found"): + CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/NonExistent", + validate_code=False, + ) + + def test_write_id_code_file_empty_code(self): + """Test that empty code raises error.""" + with pytest.raises(ValueError, match="Code cannot be empty"): + CodeFile.write_id_code_file( + code="", + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_from_path( + self, mock_find_file, mock_get_folder, mock_create_file, mock_post + ): + """Test uploading code from a file path.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f: + f.write("def execute():\n return 'test'") + temp_path = Path(f.name) + + try: + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_post.return_value = mock_code_file + + result = CodeFile.write_id_code_file( + code=temp_path, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + assert result == mock_code_file + mock_create_file.assert_called_once() + finally: + temp_path.unlink() + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.delete_file") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_post_fails_cleanup_success( + self, + mock_find_file, + mock_get_folder, + mock_create_file, + mock_delete_file, + mock_post, + ): + """Test that file is cleaned up when post fails.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_file_obj.__getitem__ = mock.MagicMock(return_value="12345") + mock_create_file.return_value = mock_file_obj + + mock_post.side_effect = RuntimeError("API error") + + code = "def execute():\n return 'test'" + + with pytest.raises( + RuntimeError, + match="There was an error with creating the code file: API error", + ): + CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + # Verify cleanup was attempted + mock_delete_file.assert_called_once_with({"id": "12345"}) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.delete_file") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_post_fails_cleanup_fails( + self, + mock_find_file, + mock_get_folder, + mock_create_file, + mock_delete_file, + mock_post, + ): + """Test error handling when both post and cleanup fail.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_file_obj.__getitem__ = mock.MagicMock(return_value="12345") + mock_create_file.return_value = mock_file_obj + + mock_post.side_effect = RuntimeError("API error") + mock_delete_file.side_effect = RuntimeError("Delete failed") + + code = "def execute():\n return 'test'" + + with pytest.raises(RuntimeError): + CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder="/Public/TestFolder", + validate_code=False, + ) + + @mock.patch("sasctl.pzmm.code_file.CodeFile.post") + @mock.patch("sasctl.services.files.create_file") + @mock.patch("sasctl.services.folders.get_folder") + @mock.patch("sasctl.pzmm.code_file.CodeFile._find_file_in_folder") + def test_write_id_code_file_with_folder_object( + self, mock_find_file, mock_get_folder, mock_create_file, mock_post + ): + """Test uploading with folder object instead of path.""" + mock_folder_obj = mock.MagicMock() + mock_folder_obj.id = "folder-123" + mock_get_folder.return_value = mock_folder_obj + mock_find_file.return_value = None + + mock_file_obj = mock.MagicMock() + mock_file_obj.id = "12345" + mock_create_file.return_value = mock_file_obj + + mock_code_file = mock.MagicMock() + mock_post.return_value = mock_code_file + + code = "def execute():\n return 'test'" + folder_dict = {"id": "folder-123", "name": "TestFolder"} + + result = CodeFile.write_id_code_file( + code=code, + file_name="test_code.py", + folder=folder_dict, + validate_code=False, + ) + + assert result == mock_code_file + mock_get_folder.assert_called_once_with(folder_dict) diff --git a/tests/unit/test_misc_utils.py b/tests/unit/test_misc_utils.py index 939e6ef6..622b2ba1 100644 --- a/tests/unit/test_misc_utils.py +++ b/tests/unit/test_misc_utils.py @@ -14,7 +14,8 @@ def test_list_packages(): packages = installed_packages() # We know that these packages should always be present - assert any(re.match("requests==.*", p) for p in packages) + assert packages is not None + assert any( re.match("sasctl.*", p) for p in packages ) # sasctl may be installed from disk so no '=='