Añadiendo todos los archivos del proyecto (incluidos secretos y venv)
This commit is contained in:
44
venv/lib/python3.12/site-packages/mysql/ai/utils/__init__.py
Normal file
44
venv/lib/python3.12/site-packages/mysql/ai/utils/__init__.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# Copyright (c) 2025 Oracle and/or its affiliates.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License, version 2.0, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This program is designed to work with certain software (including
|
||||
# but not limited to OpenSSL) that is licensed under separate terms,
|
||||
# as designated in a particular file or component or in included license
|
||||
# documentation. The authors of MySQL hereby grant you an
|
||||
# additional permission to link the program and your derivative works
|
||||
# with the separately licensed software that they have either included with
|
||||
# the program or referenced in the documentation.
|
||||
#
|
||||
# Without limiting anything contained in the foregoing, this file,
|
||||
# which is part of MySQL Connector/Python, is also subject to the
|
||||
# Universal FOSS Exception, version 1.0, a copy of which can be found at
|
||||
# http://oss.oracle.com/licenses/universal-foss-exception.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the GNU General Public License, version 2.0, for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
"""Utilities for AI-related helpers in MySQL Connector/Python.
|
||||
|
||||
This package exposes:
|
||||
- check_dependencies(): runtime dependency guard for optional AI features
|
||||
- atomic_transaction(): context manager ensuring atomic DB transactions
|
||||
- utils: general-purpose helpers used by AI integrations
|
||||
|
||||
Importing this package validates base dependencies required for AI utilities.
|
||||
"""
|
||||
|
||||
from .dependencies import check_dependencies
|
||||
|
||||
check_dependencies(["BASE"])
|
||||
|
||||
from .atomic_cursor import atomic_transaction
|
||||
from .utils import *
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,94 @@
|
||||
# Copyright (c) 2025 Oracle and/or its affiliates.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License, version 2.0, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This program is designed to work with certain software (including
|
||||
# but not limited to OpenSSL) that is licensed under separate terms,
|
||||
# as designated in a particular file or component or in included license
|
||||
# documentation. The authors of MySQL hereby grant you an
|
||||
# additional permission to link the program and your derivative works
|
||||
# with the separately licensed software that they have either included with
|
||||
# the program or referenced in the documentation.
|
||||
#
|
||||
# Without limiting anything contained in the foregoing, this file,
|
||||
# which is part of MySQL Connector/Python, is also subject to the
|
||||
# Universal FOSS Exception, version 1.0, a copy of which can be found at
|
||||
# http://oss.oracle.com/licenses/universal-foss-exception.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the GNU General Public License, version 2.0, for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
"""Atomic transaction context manager utilities for MySQL Connector/Python.
|
||||
|
||||
Provides context manager atomic_transaction() that ensures commit on success
|
||||
and rollback on error without obscuring the original exception.
|
||||
"""
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Iterator
|
||||
|
||||
from mysql.connector.abstracts import MySQLConnectionAbstract
|
||||
from mysql.connector.cursor import MySQLCursorAbstract
|
||||
|
||||
|
||||
@contextmanager
|
||||
def atomic_transaction(
|
||||
conn: MySQLConnectionAbstract,
|
||||
) -> Iterator[MySQLCursorAbstract]:
|
||||
"""
|
||||
Context manager that wraps a MySQL database cursor and ensures transaction
|
||||
rollback in case of exception.
|
||||
|
||||
NOTE: DDL statements such as CREATE TABLE cause implicit commits. These cannot
|
||||
be managed by a cursor object. Changes made at or before a DDL statement will
|
||||
be committed and not rolled back. Callers are responsible for any cleanup of
|
||||
this type.
|
||||
|
||||
This class acts as a robust, PEP 343-compliant context manager for handling
|
||||
database cursor operations on a MySQL connection. It ensures that all operations
|
||||
executed within the context block are part of the same transaction, and
|
||||
automatically calls `connection.rollback()` if an exception occurs, helping
|
||||
to maintain database integrity. On normal completion (no exception), it simply
|
||||
closes the cursor after use. Exceptions are always propagated to the caller.
|
||||
|
||||
Args:
|
||||
conn: A MySQLConnectionAbstract instance.
|
||||
"""
|
||||
old_autocommit = conn.autocommit
|
||||
cursor = conn.cursor()
|
||||
|
||||
exception_raised = False
|
||||
try:
|
||||
if old_autocommit:
|
||||
conn.autocommit = False
|
||||
|
||||
yield cursor # provide cursor to block
|
||||
|
||||
conn.commit()
|
||||
except Exception: # pylint: disable=broad-exception-caught
|
||||
exception_raised = True
|
||||
try:
|
||||
conn.rollback()
|
||||
except Exception: # pylint: disable=broad-exception-caught
|
||||
# Don't obscure original exception
|
||||
pass
|
||||
|
||||
# Raise original exception
|
||||
raise
|
||||
finally:
|
||||
conn.autocommit = old_autocommit
|
||||
|
||||
try:
|
||||
cursor.close()
|
||||
except Exception: # pylint: disable=broad-exception-caught
|
||||
# don't obscure original exception if exists
|
||||
if not exception_raised:
|
||||
raise
|
||||
@@ -0,0 +1,88 @@
|
||||
# Copyright (c) 2025 Oracle and/or its affiliates.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License, version 2.0, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This program is designed to work with certain software (including
|
||||
# but not limited to OpenSSL) that is licensed under separate terms,
|
||||
# as designated in a particular file or component or in included license
|
||||
# documentation. The authors of MySQL hereby grant you an
|
||||
# additional permission to link the program and your derivative works
|
||||
# with the separately licensed software that they have either included with
|
||||
# the program or referenced in the documentation.
|
||||
#
|
||||
# Without limiting anything contained in the foregoing, this file,
|
||||
# which is part of MySQL Connector/Python, is also subject to the
|
||||
# Universal FOSS Exception, version 1.0, a copy of which can be found at
|
||||
# http://oss.oracle.com/licenses/universal-foss-exception.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the GNU General Public License, version 2.0, for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
|
||||
"""Dependency checking utilities for AI features in MySQL Connector/Python.
|
||||
|
||||
Provides check_dependencies() to assert required optional packages are present
|
||||
with acceptable minimum versions at runtime.
|
||||
"""
|
||||
|
||||
import importlib.metadata
|
||||
|
||||
from typing import List
|
||||
|
||||
|
||||
def check_dependencies(tasks: List[str]) -> None:
|
||||
"""
|
||||
Check required runtime dependencies and minimum versions; raise an error
|
||||
if any are missing or version-incompatible.
|
||||
|
||||
This verifies the presence and minimum version of essential Python packages.
|
||||
Missing or insufficient versions cause an ImportError listing the packages
|
||||
and a suggested install command.
|
||||
|
||||
Args:
|
||||
tasks (List[str]): Task types to check requirements for.
|
||||
|
||||
Raises:
|
||||
ImportError: If any required dependencies are missing or below the
|
||||
minimum version.
|
||||
"""
|
||||
task_set = set(tasks)
|
||||
task_set.add("BASE")
|
||||
|
||||
# Requirements: (import_name, min_version)
|
||||
task_to_requirement = {
|
||||
"BASE": [("pandas", "1.5.0")],
|
||||
"GENAI": [
|
||||
("langchain", "0.1.11"),
|
||||
("langchain_core", "0.1.11"),
|
||||
("pydantic", "1.10.0"),
|
||||
],
|
||||
"ML": [("scikit-learn", "1.3.0")],
|
||||
}
|
||||
requirements = []
|
||||
for task in task_set:
|
||||
requirements.extend(task_to_requirement[task])
|
||||
requirements_set = set(requirements)
|
||||
|
||||
problems = []
|
||||
for name, min_version in requirements_set:
|
||||
try:
|
||||
installed_version = importlib.metadata.version(name)
|
||||
# Version comparison uses simple string comparison to avoid extra
|
||||
# dependencies. This is valid for the dependencies defined above;
|
||||
# reconsider if adding packages with version schemes that do not
|
||||
# compare correctly as strings.
|
||||
error = installed_version < min_version
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
error = True
|
||||
if error:
|
||||
problems.append(f"{name} v{min_version} (or later)")
|
||||
if problems:
|
||||
raise ImportError("Please install " + ", ".join(problems) + ".")
|
||||
573
venv/lib/python3.12/site-packages/mysql/ai/utils/utils.py
Normal file
573
venv/lib/python3.12/site-packages/mysql/ai/utils/utils.py
Normal file
@@ -0,0 +1,573 @@
|
||||
# Copyright (c) 2025 Oracle and/or its affiliates.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License, version 2.0, as
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
# This program is designed to work with certain software (including
|
||||
# but not limited to OpenSSL) that is licensed under separate terms,
|
||||
# as designated in a particular file or component or in included license
|
||||
# documentation. The authors of MySQL hereby grant you an
|
||||
# additional permission to link the program and your derivative works
|
||||
# with the separately licensed software that they have either included with
|
||||
# the program or referenced in the documentation.
|
||||
#
|
||||
# Without limiting anything contained in the foregoing, this file,
|
||||
# which is part of MySQL Connector/Python, is also subject to the
|
||||
# Universal FOSS Exception, version 1.0, a copy of which can be found at
|
||||
# http://oss.oracle.com/licenses/universal-foss-exception.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# See the GNU General Public License, version 2.0, for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
"""General utilities for AI features in MySQL Connector/Python.
|
||||
|
||||
Includes helpers for:
|
||||
- defensive dict copying
|
||||
- temporary table lifecycle management
|
||||
- SQL execution and result conversions
|
||||
- DataFrame to/from SQL table utilities
|
||||
- schema/table/column name validation
|
||||
- array-like to DataFrame conversion
|
||||
"""
|
||||
|
||||
import copy
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import string
|
||||
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from mysql.ai.utils.atomic_cursor import atomic_transaction
|
||||
|
||||
from mysql.connector.abstracts import MySQLConnectionAbstract
|
||||
from mysql.connector.cursor import MySQLCursorAbstract
|
||||
from mysql.connector.types import ParamsSequenceOrDictType
|
||||
|
||||
VAR_NAME_SPACE = "mysql_ai"
|
||||
RANDOM_TABLE_NAME_LENGTH = 32
|
||||
|
||||
PD_TO_SQL_DTYPE_MAPPING = {
|
||||
"int64": "BIGINT",
|
||||
"float64": "DOUBLE",
|
||||
"object": "LONGTEXT",
|
||||
"bool": "BOOLEAN",
|
||||
"datetime64[ns]": "DATETIME",
|
||||
}
|
||||
|
||||
DEFAULT_SCHEMA = "mysql_ai"
|
||||
|
||||
# Misc Utilities
|
||||
|
||||
|
||||
def copy_dict(options: Optional[dict]) -> dict:
|
||||
"""
|
||||
Make a defensive copy of a dictionary, or return an empty dict if None.
|
||||
|
||||
Args:
|
||||
options: param dict or None
|
||||
|
||||
Returns:
|
||||
dict
|
||||
"""
|
||||
if options is None:
|
||||
return {}
|
||||
|
||||
return copy.deepcopy(options)
|
||||
|
||||
|
||||
@contextmanager
|
||||
def temporary_sql_tables(
|
||||
db_connection: MySQLConnectionAbstract,
|
||||
) -> Iterator[list[tuple[str, str]]]:
|
||||
"""
|
||||
Context manager to track and automatically clean up temporary SQL tables.
|
||||
|
||||
Args:
|
||||
db_connection: Database connection object used to create and delete tables.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
|
||||
Yields:
|
||||
temporary_tables: List of (schema_name, table_name) tuples created during the
|
||||
context. All tables in this list are deleted on context exit.
|
||||
"""
|
||||
temporary_tables: List[Tuple[str, str]] = []
|
||||
try:
|
||||
yield temporary_tables
|
||||
finally:
|
||||
with atomic_transaction(db_connection) as cursor:
|
||||
for schema_name, table_name in temporary_tables:
|
||||
delete_sql_table(cursor, schema_name, table_name)
|
||||
|
||||
|
||||
def execute_sql(
|
||||
cursor: MySQLCursorAbstract, query: str, params: ParamsSequenceOrDictType = None
|
||||
) -> None:
|
||||
"""
|
||||
Execute an SQL query with optional parameters using the given cursor.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract object to execute the query.
|
||||
query: SQL query string to execute.
|
||||
params: Optional sequence or dict providing parameters for the query.
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If the provided SQL query/params are invalid
|
||||
If the query is valid but the sql raises as an exception
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
cursor.execute(query, params or ())
|
||||
|
||||
|
||||
def _get_name() -> str:
|
||||
"""
|
||||
Generate a random uppercase string of fixed length for table names.
|
||||
|
||||
Returns:
|
||||
Random string of length RANDOM_TABLE_NAME_LENGTH.
|
||||
"""
|
||||
char_set = string.ascii_uppercase
|
||||
return "".join(random.choices(char_set, k=RANDOM_TABLE_NAME_LENGTH))
|
||||
|
||||
|
||||
def get_random_name(condition: Callable[[str], bool], max_calls: int = 100) -> str:
|
||||
"""
|
||||
Generate a random string name that satisfies a given condition.
|
||||
|
||||
Args:
|
||||
condition: Callable that takes a generated name and returns True if it is valid.
|
||||
max_calls: Maximum number of attempts before giving up (default 100).
|
||||
|
||||
Returns:
|
||||
A random string that fulfills the provided condition.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the maximum number of attempts is reached without success.
|
||||
"""
|
||||
for _ in range(max_calls):
|
||||
if condition(name := _get_name()):
|
||||
return name
|
||||
# condition never met
|
||||
raise RuntimeError("Reached max tries without successfully finding a unique name")
|
||||
|
||||
|
||||
# Format conversions
|
||||
|
||||
|
||||
def format_value_sql(value: Any) -> Tuple[str, List[Any]]:
|
||||
"""
|
||||
Convert a Python value into its SQL-compatible string representation and parameters.
|
||||
|
||||
Args:
|
||||
value: The value to format.
|
||||
|
||||
Returns:
|
||||
Tuple containing:
|
||||
- A string for substitution into a SQL query.
|
||||
- A list of parameters to be bound into the query.
|
||||
"""
|
||||
if isinstance(value, (dict, list)):
|
||||
if len(value) == 0:
|
||||
return "%s", [None]
|
||||
return "CAST(%s as JSON)", [json.dumps(value)]
|
||||
return "%s", [value]
|
||||
|
||||
|
||||
def sql_response_to_df(cursor: MySQLCursorAbstract) -> pd.DataFrame:
|
||||
"""
|
||||
Convert the results of a cursor's last executed query to a pandas DataFrame.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract with a completed query.
|
||||
|
||||
Returns:
|
||||
DataFrame with data from the cursor.
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
If a compatible SELECT query wasn't the last statement ran
|
||||
"""
|
||||
|
||||
def _json_processor(elem: Optional[str]) -> Optional[dict]:
|
||||
return json.loads(elem) if elem is not None else None
|
||||
|
||||
def _default_processor(elem: Any) -> Any:
|
||||
return elem
|
||||
|
||||
idx_to_processor = {}
|
||||
for idx, col in enumerate(cursor.description):
|
||||
if col[1] == 245:
|
||||
# 245 is the MySQL type code for JSON
|
||||
idx_to_processor[idx] = _json_processor
|
||||
else:
|
||||
idx_to_processor[idx] = _default_processor
|
||||
|
||||
rows = cursor.fetchall()
|
||||
|
||||
# Process results
|
||||
processed_rows = []
|
||||
for row in rows:
|
||||
processed_row = list(row)
|
||||
|
||||
for idx, elem in enumerate(row):
|
||||
processed_row[idx] = idx_to_processor[idx](elem)
|
||||
|
||||
processed_rows.append(processed_row)
|
||||
|
||||
return pd.DataFrame(processed_rows, columns=cursor.column_names)
|
||||
|
||||
|
||||
def sql_table_to_df(
|
||||
cursor: MySQLCursorAbstract, schema_name: str, table_name: str
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Load the entire contents of a SQL table into a pandas DataFrame.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract to execute the query.
|
||||
schema_name: Name of the schema containing the table.
|
||||
table_name: Name of the table to fetch.
|
||||
|
||||
Returns:
|
||||
DataFrame containing all rows from the specified table.
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If the table does not exist
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
ValueError: If the schema or table name is not valid
|
||||
"""
|
||||
validate_name(schema_name)
|
||||
validate_name(table_name)
|
||||
|
||||
execute_sql(cursor, f"SELECT * FROM {schema_name}.{table_name}")
|
||||
return sql_response_to_df(cursor)
|
||||
|
||||
|
||||
# Table operations
|
||||
|
||||
|
||||
def table_exists(
|
||||
cursor: MySQLCursorAbstract, schema_name: str, table_name: str
|
||||
) -> bool:
|
||||
"""
|
||||
Check whether a table exists in a specific schema.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract object to execute the query.
|
||||
schema_name: Name of the database schema.
|
||||
table_name: Name of the table.
|
||||
|
||||
Returns:
|
||||
True if the table exists, False otherwise.
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
ValueError: If the schema or table name is not valid
|
||||
"""
|
||||
validate_name(schema_name)
|
||||
validate_name(table_name)
|
||||
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT 1
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
LIMIT 1
|
||||
""",
|
||||
(schema_name, table_name),
|
||||
)
|
||||
return cursor.fetchone() is not None
|
||||
|
||||
|
||||
def delete_sql_table(
|
||||
cursor: MySQLCursorAbstract, schema_name: str, table_name: str
|
||||
) -> None:
|
||||
"""
|
||||
Drop a table from the SQL database if it exists.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract to execute the drop command.
|
||||
schema_name: Name of the schema.
|
||||
table_name: Name of the table to delete.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
ValueError: If the schema or table name is not valid
|
||||
"""
|
||||
validate_name(schema_name)
|
||||
validate_name(table_name)
|
||||
|
||||
execute_sql(cursor, f"DROP TABLE IF EXISTS {schema_name}.{table_name}")
|
||||
|
||||
|
||||
def extend_sql_table(
|
||||
cursor: MySQLCursorAbstract,
|
||||
schema_name: str,
|
||||
table_name: str,
|
||||
df: pd.DataFrame,
|
||||
col_name_to_placeholder_string: Dict[str, str] = None,
|
||||
) -> None:
|
||||
"""
|
||||
Insert all rows from a pandas DataFrame into an existing SQL table.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract for execution.
|
||||
schema_name: Name of the database schema.
|
||||
table_name: Table to insert new rows into.
|
||||
df: DataFrame containing the rows to insert.
|
||||
col_name_to_placeholder_string:
|
||||
Optional mapping of column names to custom SQL value/placeholder
|
||||
strings.
|
||||
|
||||
Returns:
|
||||
None
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If the rows could not be inserted into the table, e.g., a type or shape issue
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
ValueError: If the schema or table name is not valid
|
||||
"""
|
||||
if col_name_to_placeholder_string is None:
|
||||
col_name_to_placeholder_string = {}
|
||||
|
||||
validate_name(schema_name)
|
||||
validate_name(table_name)
|
||||
for col in df.columns:
|
||||
validate_name(str(col))
|
||||
|
||||
qualified_table_name = f"{schema_name}.{table_name}"
|
||||
|
||||
# Iterate over all rows in the DataFrame to build insert statements row by row
|
||||
for row in df.values:
|
||||
placeholders, params = [], []
|
||||
for elem, col in zip(row, df.columns):
|
||||
elem = elem.item() if hasattr(elem, "item") else elem
|
||||
|
||||
if col in col_name_to_placeholder_string:
|
||||
elem_placeholder, elem_params = col_name_to_placeholder_string[col], [
|
||||
str(elem)
|
||||
]
|
||||
else:
|
||||
elem_placeholder, elem_params = format_value_sql(elem)
|
||||
|
||||
placeholders.append(elem_placeholder)
|
||||
params.extend(elem_params)
|
||||
|
||||
cols_sql = ", ".join([str(col) for col in df.columns])
|
||||
placeholders_sql = ", ".join(placeholders)
|
||||
insert_sql = (
|
||||
f"INSERT INTO {qualified_table_name} "
|
||||
f"({cols_sql}) VALUES ({placeholders_sql})"
|
||||
)
|
||||
execute_sql(cursor, insert_sql, params=params)
|
||||
|
||||
|
||||
def sql_table_from_df(
|
||||
cursor: MySQLCursorAbstract, schema_name: str, df: pd.DataFrame
|
||||
) -> Tuple[str, str]:
|
||||
"""
|
||||
Create a new SQL table with a random name, and populate it with data from a DataFrame.
|
||||
|
||||
If an 'id' column is defined in the dataframe, it will be used as the primary key.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract for executing SQL.
|
||||
schema_name: Schema in which to create the table.
|
||||
df: DataFrame containing the data to be inserted.
|
||||
|
||||
Returns:
|
||||
Tuple (qualified_table_name, table_name): The schema-qualified and
|
||||
unqualified table names.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If a random available table name could not be found.
|
||||
ValueError: If any schema, table, or a column name is invalid.
|
||||
DatabaseError:
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
"""
|
||||
table_name = get_random_name(
|
||||
lambda table_name: not table_exists(cursor, schema_name, table_name)
|
||||
)
|
||||
qualified_table_name = f"{schema_name}.{table_name}"
|
||||
|
||||
validate_name(schema_name)
|
||||
validate_name(table_name)
|
||||
for col in df.columns:
|
||||
validate_name(str(col))
|
||||
|
||||
columns_sql = []
|
||||
for col, dtype in df.dtypes.items():
|
||||
# Map pandas dtype to SQL type, fallback is VARCHAR
|
||||
sql_type = PD_TO_SQL_DTYPE_MAPPING.get(str(dtype), "LONGTEXT")
|
||||
validate_name(str(col))
|
||||
columns_sql.append(f"{col} {sql_type}")
|
||||
|
||||
columns_str = ", ".join(columns_sql)
|
||||
|
||||
has_id_col = any(col.lower() == "id" for col in df.columns)
|
||||
if has_id_col:
|
||||
columns_str += ", PRIMARY KEY (id)"
|
||||
|
||||
# Create table with generated columns
|
||||
create_table_sql = f"CREATE TABLE {qualified_table_name} ({columns_str})"
|
||||
execute_sql(cursor, create_table_sql)
|
||||
|
||||
try:
|
||||
# Insert provided data into new table
|
||||
extend_sql_table(cursor, schema_name, table_name, df)
|
||||
except Exception: # pylint: disable=broad-exception-caught
|
||||
# Delete table before we lose access to it
|
||||
delete_sql_table(cursor, schema_name, table_name)
|
||||
raise
|
||||
return qualified_table_name, table_name
|
||||
|
||||
|
||||
def validate_name(name: str) -> str:
|
||||
"""
|
||||
Validate that the string is a legal SQL identifier (letters, digits, underscores).
|
||||
|
||||
Args:
|
||||
name: Name (schema, table, or column) to validate.
|
||||
|
||||
Returns:
|
||||
The validated name.
|
||||
|
||||
Raises:
|
||||
ValueError: If the name does not meet format requirements.
|
||||
"""
|
||||
# Accepts only letters, digits, and underscores; change as needed
|
||||
if not (isinstance(name, str) and re.match(r"^[A-Za-z0-9_]+$", name)):
|
||||
raise ValueError(f"Unsupported name format {name}")
|
||||
|
||||
return name
|
||||
|
||||
|
||||
def source_schema(db_connection: MySQLConnectionAbstract) -> str:
|
||||
"""
|
||||
Retrieve the name of the currently selected schema, or set and ensure the default schema.
|
||||
|
||||
Args:
|
||||
db_connection: MySQL connector database connection object.
|
||||
|
||||
Returns:
|
||||
Name of the schema (database in use).
|
||||
|
||||
Raises:
|
||||
ValueError: If the schema name is not valid
|
||||
DatabaseError:
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
"""
|
||||
schema = db_connection.database
|
||||
if schema is None:
|
||||
schema = DEFAULT_SCHEMA
|
||||
|
||||
with atomic_transaction(db_connection) as cursor:
|
||||
create_database_stmt = f"CREATE DATABASE IF NOT EXISTS {schema}"
|
||||
execute_sql(cursor, create_database_stmt)
|
||||
|
||||
validate_name(schema)
|
||||
|
||||
return schema
|
||||
|
||||
|
||||
def is_table_empty(
|
||||
cursor: MySQLCursorAbstract, schema_name: str, table_name: str
|
||||
) -> bool:
|
||||
"""
|
||||
Determine if a given SQL table is empty.
|
||||
|
||||
Args:
|
||||
cursor: MySQLCursorAbstract with access to the database.
|
||||
schema_name: Name of the schema containing the table.
|
||||
table_name: Name of the table to check.
|
||||
|
||||
Returns:
|
||||
True if the table has no rows, False otherwise.
|
||||
|
||||
Raises:
|
||||
DatabaseError:
|
||||
If the table does not exist
|
||||
If a database connection issue occurs.
|
||||
If an operational error occurs during execution.
|
||||
ValueError: If the schema or table name is not valid
|
||||
"""
|
||||
validate_name(schema_name)
|
||||
validate_name(table_name)
|
||||
|
||||
cursor.execute(f"SELECT 1 FROM {schema_name}.{table_name} LIMIT 1")
|
||||
return cursor.fetchone() is None
|
||||
|
||||
|
||||
def convert_to_df(
|
||||
arr: Optional[Union[pd.DataFrame, pd.Series, np.ndarray]],
|
||||
col_prefix: str = "feature",
|
||||
) -> Optional[pd.DataFrame]:
|
||||
"""
|
||||
Convert input data to a pandas DataFrame if necessary.
|
||||
|
||||
Args:
|
||||
arr: Input data as a pandas DataFrame, NumPy ndarray, pandas Series, or None.
|
||||
|
||||
Returns:
|
||||
If the input is None, returns None.
|
||||
Otherwise, returns a DataFrame backed by the same underlying data whenever
|
||||
possible (except in cases where pandas or NumPy must copy, such as for
|
||||
certain views or non-contiguous arrays).
|
||||
|
||||
Notes:
|
||||
- If an ndarray is passed, column names will be integer indices (0, 1, ...).
|
||||
- If a DataFrame is passed, column names and indices are preserved.
|
||||
- The returned DataFrame is a shallow copy and shares data with the original
|
||||
input when possible; however, copies may still occur for certain input
|
||||
types or memory layouts.
|
||||
"""
|
||||
if arr is None:
|
||||
return None
|
||||
|
||||
if isinstance(arr, pd.DataFrame):
|
||||
return pd.DataFrame(arr)
|
||||
if isinstance(arr, pd.Series):
|
||||
return arr.to_frame()
|
||||
|
||||
if arr.ndim == 1:
|
||||
arr = arr.reshape(-1, 1)
|
||||
col_names = [f"{col_prefix}_{idx}" for idx in range(arr.shape[1])]
|
||||
|
||||
return pd.DataFrame(arr, columns=col_names, copy=False)
|
||||
Reference in New Issue
Block a user