o
    =DisH                     @   s   d Z ddlZddlmZmZmZmZmZmZ ddl	Z
ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZ dd
l m!Z! dZ"dZ#e dZ$e dZ%e dZ&e dZ'e dZ(dZ)G dd deZ*dS )zMySQL-backed vector store for embeddings and semantic document retrieval.

Provides a VectorStore implementation persisting documents, metadata, and
embeddings in MySQL, plus similarity search utilities.
    N)AnyIterableListOptionalSequenceUnionDocument)
Embeddings)VectorStore)PrivateAttr)MyEmbeddings)
VAR_NAME_SPACEatomic_transactiondelete_sql_tableexecute_sqlextend_sql_tableformat_value_sqlget_random_nameis_table_emptysource_schematable_exists)MySQLConnectionAbstractzHello world!external_sourcez
.embeddingz.contextz.context_mapz.retrieval_infoz.optionsinternal_ai_id_c                       s  e Zd ZU dZe Zeed< e Ze	ed< e Z
eed< e Zee ed< e Zeed< e Zeed< 	d0d	ed
ee	 ddf fddZdedee fddZd1ddZd0deee  deddfddZd1ddZ		d2dee deee  deee  dedee f
ddZe		d2dee d
e	deee  d	edef
ddZ	d0dee  dee dee fd d!Z!	"d3d#ed$ed%edee  fd&d'Z"d4d)d*Z#d+e$e%df d,e$e&df d-e$e'df ddfd.d/Z(  Z)S )5MyVectorStorea  
    MySQL-backed vector store for handling embeddings and semantic document retrieval.

    Supports adding, deleting, and searching high-dimensional vector representations
    of documents using efficient storage and HeatWave ML similarity search procedures.

    Supports use as a context manager: when used in a `with` statement, all backing
    tables/data are deleted automatically when the block exits (even on exception).

    Attributes:
        db_connection (MySQLConnectionAbstract): Active MySQL database connection.
        embedder (Embeddings): Embeddings generator for computing vector representations.
        schema_name (str): SQL schema for table storage.
        table_name (Optional[str]): Name of the active table backing the store
            (or None until created).
        embedding_dimension (int): Size of embedding vectors stored.
        next_id (int): Internal counter for unique document ID generation.
    _db_connection	_embedder_schema_name_table_name_embedding_dimension_next_idNdb_connectionembedderreturnc                    sJ   t    d| _t|| _|pt|| _|| _d| _t	| j
t| _dS )a  
        Initialize a MyVectorStore with a database connection and embedding generator.

        Args:
            db_connection: MySQL database connection for all vector operations.
            embedder: Embeddings generator used for creating and querying embeddings.

        Raises:
            ValueError: If the schema name is not valid
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        r   N)super__init__r!   r   r   r   r   r   r   lenembed_queryBASIC_EMBEDDING_QUERYr    )selfr"   r#   	__class__ T/var/www/Datamplify/venv/lib/python3.10/site-packages/mysql/ai/genai/vector_store.pyr&   d   s   



zMyVectorStore.__init__num_idsc                 C   s.   dd t | j| j| D }|  j|7  _|S )z
        Generate a batch of unique internal document IDs for vector storage.

        Args:
            num_ids: Number of IDs to create.

        Returns:
            List of sequentially numbered internal string IDs.
        c                 S   s   g | ]}d | qS )r   r-   ).0ir-   r-   r.   
<listcomp>   s    
z*MyVectorStore._get_ids.<locals>.<listcomp>)ranger!   )r*   r/   idsr-   r-   r.   _get_ids   s
   
zMyVectorStore._get_idsc                    s|   j du r<tj( t fdd}dj d| d}t |jfd |_ W d   dS 1 s5w   Y  dS dS )a  
        Create a backing SQL table for storing vectors if not already created.

        Returns:
            None

        Raises:
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            The table name is randomized to avoid collisions.
            Schema includes content, metadata, and embedding vector.
        Nc                    s   t  j|  S N)r   r   )
table_namecursorr*   r-   r.   <lambda>   s    z2MyVectorStore._make_vector_store.<locals>.<lambda>z
                CREATE TABLE .a	   (
                    `id` VARCHAR(128) NOT NULL,
                    `content` TEXT,
                    `metadata` JSON DEFAULT NULL,
                    `embed` vector(%s),
                    PRIMARY KEY (`id`)
                ) ENGINE=InnoDB;
                params)r   r   r   r   r   r   r    )r*   r7   create_table_stmtr-   r8   r.   _make_vector_store   s    
	
"z MyVectorStore._make_vector_storer4   _c              	   K   s   t | j5}|r|D ]}t|d| j d| j d|fd q
t|| j| jr2|   W d   dS W d   dS 1 s=w   Y  dS )a#  
        Delete documents by ID. Optionally deletes the vector table if empty after deletions.

        Args:
            ids: Optional sequence of document IDs to delete. If None, no action is taken.

        Returns:
            None

        Raises:
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            If the backing table is empty after deletions, the table is dropped and
            table_name is set to None.
        zDELETE FROM r;    WHERE id = %sr<   N)r   r   r   r   r   r   
delete_all)r*   r4   r@   r9   _idr-   r-   r.   delete   s   
	"zMyVectorStore.deletec                 C   sT   | j dur(t| j}t|| j| j  d| _ W d   dS 1 s!w   Y  dS dS )zc
        Delete and drop the entire vector store table.

        Returns:
            None
        N)r   r   r   r   r   )r*   r9   r-   r-   r.   rB      s   
"zMyVectorStore.delete_alltexts	metadatasc                 K   s8   t |}dd t||pi gt| D }| j||dS )ap  
        Add a batch of text strings and corresponding metadata to the vector store.

        Args:
            texts: List of strings to embed and store.
            metadatas: Optional list of metadata dicts (one per text).
            ids: Optional custom document IDs.

        Returns:
            List of document IDs corresponding to the added texts.

        Raises:
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            If metadatas is None, an empty dict is assigned to each document.
        c                 S   s   g | ]
\}}t ||d qS ))page_contentmetadatar   )r0   textmetar-   r-   r.   r2     s    
z+MyVectorStore.add_texts.<locals>.<listcomp>)r4   )listzipr'   add_documents)r*   rE   rF   r4   r@   	documentsr-   r-   r.   	add_texts   s
   zMyVectorStore.add_textsc                 C   s6   |du rt dt|}| ||d}|j||d |S )a  
        Construct and populate a MyVectorStore instance from raw texts and metadata.

        Args:
            texts: List of strings to vectorize and store.
            embedder: Embeddings generator to use.
            metadatas: Optional list of metadata dicts per text.
            db_connection: Active MySQL connection.

        Returns:
            Instance of MyVectorStore containing the added texts.

        Raises:
            ValueError: If db_connection is not provided.
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        Nz@db_connection must be specified to create a MyVectorStore object)r"   r#   )rF   )
ValueErrorrK   rO   )clsrE   r#   rF   r"   instancer-   r-   r.   
from_texts  s   zMyVectorStore.from_textsrN   c                 C   s   |rt |t |krdt | dt | d}t|t |dkr&|   ng S |du r3| t |}dd |D }| j|}t }||d< ||d	< ||d
< dd |D |d< t| j	}t
|| j| j|d
did W d   |S 1 sww   Y  |S )a  
        Embed and store Document objects as high-dimensional vectors with metadata.

        Args:
            documents: List of Document objects (each with 'page_content' and 'metadata').
            ids: Optional list of explicit document IDs. Must match the length of documents.

        Returns:
            List of document IDs stored.

        Raises:
            ValueError: If provided IDs do not match the number of documents.
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            Automatically creates the backing table if it does not exist.
        z.ids must be the same length as documents. Got z	 ids and z documents.r   Nc                 S      g | ]}|j qS r-   )rG   r0   docr-   r-   r.   r2   S      z/MyVectorStore.add_documents.<locals>.<listcomp>idcontentembedc                 S   rT   r-   )rH   rU   r-   r-   r.   r2   Z  rW   rH   zstring_to_vector(%s))col_name_to_placeholder_string)r'   rP   r?   r5   r   embed_documentspd	DataFramer   r   r   r   r   )r*   rN   r4   msgrY   vectorsdfr9   r-   r-   r.   rM   .  s@   

		zMyVectorStore.add_documents   querykkwargsc                 K   s  | j du rg S | j|}t| j}t|dt dt|gd |dd}|dd|d	d
|ddd}t	|\}}	dt d| j
 d| j  d| d| dt dt dt d}
t||
|g|	d t|dt  g }t| d }|D ]8}t|d| j
 d| j  d|d fd | \}}}||d}|durt||d< tdi |}|| q{|W  d   S 1 sw   Y  dS )a  
        Search for and return the most similar documents in the store to the given query.

        Args:
            query: String query to embed and use for similarity search.
            k: Number of top documents to return.
            kwargs: options to pass to ML_SIMILARITY_SEARCH. Currently supports
                distance_metric, max_distance, percentage_distance, and segment_overlap

        Returns:
            List of Document objects, ordered from most to least similar.

        Raises:
            DatabaseError:
                If provided kwargs are invalid or unsupported.
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Implementation Notes:
            - Calls ML similarity search within MySQL using stored procedures.
            - Retrieves IDs, content, and metadata for search matches.
            - Parsing and retrieval for context results are handled via intermediate JSONs.
        NzSET @z = string_to_vector(%s)r<   distance_metricCOSINEmax_distanceg333333?percentage_distanceg      4@segment_overlapr   )rh   ri   rj   z=
            CALL sys.ML_SIMILARITY_SEARCH(
                @z3,
                JSON_ARRAY(
                    'r;   z'
                ),
                JSON_OBJECT(
                    "segment", "content",
                    "segment_embedding", "embed",
                    "document_name", "id"
                ),
                zR,
                %s,
                NULL,
                NULL,
                z,
                @z
            )
            zSELECT @z"SELECT id, content, metadata FROM rA   document_name)rX   rG   rH   r-   )r   r   r(   r   r   r   VAR_EMBEDDINGstrgetr   r   VAR_CONTEXTVAR_CONTEXT_MAPVAR_RETRIEVAL_INFOjsonloadsfetchoner	   append)r*   rc   rd   re   	embeddingr9   rf   retrieval_optionsretrieval_options_placeholderretrieval_options_paramssimilarity_search_queryresultscontext_mapscontextdoc_idrY   rH   doc_argsrV   r-   r-   r.   similarity_searchg  s|   




	$zMyVectorStore.similarity_searchr   c                 C   s   | S )a  
        Enter the runtime context related to this vector store instance.

        Returns:
            The current MyVectorStore object, allowing use within a `with` statement block.

        Usage Notes:
            - Intended for use in a `with` statement to ensure automatic
              cleanup of resources.
            - No special initialization occurs during context entry, but enables
              proper context-managed lifecycle.

        Example:
            with MyVectorStore(db_connection, embedder) as vectorstore:
                vectorstore.add_texts([...])
                # Vector store is active within this block.
            # All storage and resources are now cleaned up.
        r-   )r*   r-   r-   r.   	__enter__  s   zMyVectorStore.__enter__exc_typeexc_valexc_tbc                 C   s   |    dS )a  
        Exit the runtime context for the vector store, ensuring all storage
        resources are cleaned up.

        Args:
            exc_type: The exception type, if any exception occurred in the context block.
            exc_val: The exception value, if any exception occurred in the context block.
            exc_tb:  The traceback object, if any exception occurred in the context block.

        Returns:
            None: Indicates that exceptions are never suppressed; they will propagate as normal.

        Implementation Notes:
            - Automatically deletes all vector store data and backing tables via `delete_all()`
            upon exiting the context.
            - This cleanup occurs whether the block exits normally or due to an exception.
            - Does not suppress exceptions; errors in the context block will continue to propagate.
            - Use when the vector store lifecycle is intended to be temporary or scoped.

        Example:
            with MyVectorStore(db_connection, embedder) as vectorstore:
                vectorstore.add_texts([...])
                # Vector store is active within this block.
            # All storage and resources are now cleaned up.
        N)rB   )r*   r   r   r   r-   r-   r.   __exit__  s   zMyVectorStore.__exit__r6   )r$   N)NN)rb   )r$   r   )*__name__
__module____qualname____doc__r   r   r   __annotations__r   r
   r   rm   r   r   r    intr!   r&   rK   r5   r?   r   r   rD   rB   r   dictr   rO   classmethodr   rS   r	   rM   r   r   r   typeBaseExceptionobjectr   __classcell__r-   r-   r+   r.   r   I   s   
  
 '



"
&
<

l


r   )+r   rr   typingr   r   r   r   r   r   pandasr]   langchain_core.documentsr	   langchain_core.embeddingsr
   langchain_core.vectorstoresr   pydanticr   mysql.ai.genai.embeddingr   mysql.ai.utilsr   r   r   r   r   r   r   r   r   r   mysql.connector.abstractsr   r)   EMBEDDING_SOURCErl   ro   rp   rq   VAR_OPTIONSID_SPACEr   r-   r-   r-   r.   <module>   s(    0




