
    3isH                        d Z ddlZddlmZmZmZmZmZmZ ddl	Z
ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZmZmZmZmZmZmZmZ dd
l m!Z! dZ"dZ#e dZ$e dZ%e dZ&e dZ'e dZ(dZ) G d de      Z*y)zMySQL-backed vector store for embeddings and semantic document retrieval.

Provides a VectorStore implementation persisting documents, metadata, and
embeddings in MySQL, plus similarity search utilities.
    N)AnyIterableListOptionalSequenceUnion)Document)
Embeddings)VectorStore)PrivateAttr)MyEmbeddings)
VAR_NAME_SPACEatomic_transactiondelete_sql_tableexecute_sqlextend_sql_tableformat_value_sqlget_random_nameis_table_emptysource_schematable_exists)MySQLConnectionAbstractzHello world!external_sourcez
.embeddingz.contextz.context_mapz.retrieval_infoz.optionsinternal_ai_id_c                   (    e Zd ZU dZ e       Zeed<    e       Ze	ed<    e       Z
eed<    e       Zee   ed<    e       Zeed<    e       Zeed<   	 d#d	ed
ee	   ddf fdZdedee   fdZd$dZd#deee      deddfdZd$dZ	 	 d%dee   deee      deee      dedee   f
dZe	 	 d%dee   d
e	deee      d	edef
d       Z	 d#dee    dee   dee   fdZ!	 d&dedededee    fdZ"d'dZ#de$e%df   d e$e&df   d!e$e'df   ddfd"Z( xZ)S )(MyVectorStorea  
    MySQL-backed vector store for handling embeddings and semantic document retrieval.

    Supports adding, deleting, and searching high-dimensional vector representations
    of documents using efficient storage and HeatWave ML similarity search procedures.

    Supports use as a context manager: when used in a `with` statement, all backing
    tables/data are deleted automatically when the block exits (even on exception).

    Attributes:
        db_connection (MySQLConnectionAbstract): Active MySQL database connection.
        embedder (Embeddings): Embeddings generator for computing vector representations.
        schema_name (str): SQL schema for table storage.
        table_name (Optional[str]): Name of the active table backing the store
            (or None until created).
        embedding_dimension (int): Size of embedding vectors stored.
        next_id (int): Internal counter for unique document ID generation.
    _db_connection	_embedder_schema_name_table_name_embedding_dimension_next_idNdb_connectionembedderreturnc                     t         |           d| _        t        |      | _        |xs t        |      | _        || _        d| _        t        | j                  j                  t                    | _        y)a  
        Initialize a MyVectorStore with a database connection and embedding generator.

        Args:
            db_connection: MySQL database connection for all vector operations.
            embedder: Embeddings generator used for creating and querying embeddings.

        Raises:
            ValueError: If the schema name is not valid
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        r   N)super__init__r"   r   r   r   r   r   r    lenembed_queryBASIC_EMBEDDING_QUERYr!   )selfr#   r$   	__class__s      @D:\jyotish\venv\Lib\site-packages\mysql/ai/genai/vector_store.pyr(   zMyVectorStore.__init__d   sg    $ 	)-8!@\-%@+*. %(NN&&'<=%
!    num_idsc                     t        | j                  | j                  |z         D cg c]  }d| 	 }}| xj                  |z  c_        |S c c}w )z
        Generate a batch of unique internal document IDs for vector storage.

        Args:
            num_ids: Number of IDs to create.

        Returns:
            List of sequentially numbered internal string IDs.
        r   )ranger"   )r,   r0   iidss       r.   _get_idszMyVectorStore._get_ids   sW     ,1PW@W+X
+XaoaS!+X 	 
 	 
	
s   Ac                       j                   at         j                        5 t         fd      }d j                   d| d}t        | j                  f       | _         ddd       yy# 1 sw Y   yxY w)a  
        Create a backing SQL table for storing vectors if not already created.

        Returns:
            None

        Raises:
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            The table name is randomized to avoid collisions.
            Schema includes content, metadata, and embedding vector.
        Nc                 4    t        j                  |        S N)r   r   )
table_namecursorr,   s    r.   <lambda>z2MyVectorStore._make_vector_store.<locals>.<lambda>   s    < 1 1:, (r/   z
                CREATE TABLE .a	   (
                    `id` VARCHAR(128) NOT NULL,
                    `content` TEXT,
                    `metadata` JSON DEFAULT NULL,
                    `embed` vector(%s),
                    PRIMARY KEY (`id`)
                ) ENGINE=InnoDB;
                params)r    r   r   r   r   r   r!   )r,   r9   create_table_stmtr:   s   `  @r.   _make_vector_storez MyVectorStore._make_vector_store   s      ##D$7$78F,
)"//0* >%! -t7P7P6R $. ) 98 $88s   AA11A:r4   _c           
          t        | j                        5 }|r1|D ],  }t        |d| j                   d| j                   d|f       . t        || j                  | j                        r| j                          ddd       y# 1 sw Y   yxY w)a#  
        Delete documents by ID. Optionally deletes the vector table if empty after deletions.

        Args:
            ids: Optional sequence of document IDs to delete. If None, no action is taken.

        Returns:
            None

        Raises:
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            If the backing table is empty after deletions, the table is dropped and
            table_name is set to None.
        zDELETE FROM r<    WHERE id = %sr=   N)r   r   r   r   r    r   
delete_all)r,   r4   rA   r:   _ids        r.   deletezMyVectorStore.delete   s    &   3 34C&t'8'8&94;K;K:LN[ #v  fd&7&79I9IJ! 544s   A%BBc                     | j                   Gt        | j                        5 }t        || j                  | j                          d| _         ddd       yy# 1 sw Y   yxY w)zc
        Delete and drop the entire vector store table.

        Returns:
            None
        N)r    r   r   r   r   )r,   r:   s     r.   rD   zMyVectorStore.delete_all   sT     '#D$7$78F ):):D<L<LM#'  98 (88s   )AAtexts	metadatasc                     t        |      }t        ||xs i gt        |      z        D cg c]  \  }}t        ||       }}}| j	                  ||      S c c}}w )ap  
        Add a batch of text strings and corresponding metadata to the vector store.

        Args:
            texts: List of strings to embed and store.
            metadatas: Optional list of metadata dicts (one per text).
            ids: Optional custom document IDs.

        Returns:
            List of document IDs corresponding to the added texts.

        Raises:
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            If metadatas is None, an empty dict is assigned to each document.
        )page_contentmetadata)r4   )listzipr)   r	   add_documents)r,   rH   rI   r4   rA   textmeta	documentss           r.   	add_textszMyVectorStore.add_texts   sn    4 U "%)GrdSZ6GH
H
d $6H 	 
 !!)!55	
s   Ac                 p    |t        d      t        |      } | ||      }|j                  ||       |S )a  
        Construct and populate a MyVectorStore instance from raw texts and metadata.

        Args:
            texts: List of strings to vectorize and store.
            embedder: Embeddings generator to use.
            metadatas: Optional list of metadata dicts per text.
            db_connection: Active MySQL connection.

        Returns:
            Instance of MyVectorStore containing the added texts.

        Raises:
            ValueError: If db_connection is not provided.
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.
        z@db_connection must be specified to create a MyVectorStore object)r#   r$   )rI   )
ValueErrorrM   rS   )clsrH   r$   rI   r#   instances         r.   
from_textszMyVectorStore.from_texts  sI    4  R  U]XF5I6r/   rR   c           	         |r=t        |      t        |      k7  r&dt        |       dt        |       d}t        |      t        |      dkD  r| j                          ng S || j                  t        |            }|D cg c]  }|j                   }}| j
                  j                  |      }t        j                         }||d<   ||d<   ||d<   |D cg c]  }|j                   c}|d	<   t        | j                        5 }t        || j                  | j                  |dd
i       ddd       |S c c}w c c}w # 1 sw Y   |S xY w)a  
        Embed and store Document objects as high-dimensional vectors with metadata.

        Args:
            documents: List of Document objects (each with 'page_content' and 'metadata').
            ids: Optional list of explicit document IDs. Must match the length of documents.

        Returns:
            List of document IDs stored.

        Raises:
            ValueError: If provided IDs do not match the number of documents.
            DatabaseError:
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Notes:
            Automatically creates the backing table if it does not exist.
        z.ids must be the same length as documents. Got z	 ids and z documents.r   NidcontentembedrL   zstring_to_vector(%s))col_name_to_placeholder_string)r)   rU   r@   r5   rK   r   embed_documentspd	DataFramerL   r   r   r   r   r    )	r,   rR   r4   msgdocr[   vectorsdfr:   s	            r.   rO   zMyVectorStore.add_documents.  sF   , 3s8s9~-3xj	#i.)9F  S/!y>A##%I;--I/C/89y3##y9..009\\^4972;<)3#,,)<: 3 34!!  079O/P 5 
% : =4 
s   D7D<'EEquerykkwargsc                    | j                   g S | j                  j                  |      }t        | j                        5 }t        |dt         dt        |      g       |j                  dd      }|j                  dd      |j                  d	d
      |j                  dd      d}t        |      \  }}	dt         d| j                   d| j                    d| d| dt         dt         dt         d}
t        ||
|g|	       t        |dt                g }t        j                  |j!                         d         }|D ]~  }t        |d| j                   d| j                    d|d   f       |j!                         \  }}}||d}|t        j                  |      |d<   t#        di |}|j%                  |        |cddd       S # 1 sw Y   yxY w)a  
        Search for and return the most similar documents in the store to the given query.

        Args:
            query: String query to embed and use for similarity search.
            k: Number of top documents to return.
            kwargs: options to pass to ML_SIMILARITY_SEARCH. Currently supports
                distance_metric, max_distance, percentage_distance, and segment_overlap

        Returns:
            List of Document objects, ordered from most to least similar.

        Raises:
            DatabaseError:
                If provided kwargs are invalid or unsupported.
                If a database connection issue occurs.
                If an operational error occurs during execution.

        Implementation Notes:
            - Calls ML similarity search within MySQL using stored procedures.
            - Retrieves IDs, content, and metadata for search matches.
            - Parsing and retrieval for context results are handled via intermediate JSONs.
        NzSET @z = string_to_vector(%s)r=   distance_metricCOSINEmax_distanceg333333?percentage_distanceg      4@segment_overlapr   )rk   rl   rm   z=
            CALL sys.ML_SIMILARITY_SEARCH(
                @z3,
                JSON_ARRAY(
                    'r<   z'
                ),
                JSON_OBJECT(
                    "segment", "content",
                    "segment_embedding", "embed",
                    "document_name", "id"
                ),
                zR,
                %s,
                NULL,
                NULL,
                z,
                @z
            )
            zSELECT @z"SELECT id, content, metadata FROM rC   document_name)rZ   rK   rL    )r    r   r*   r   r   r   VAR_EMBEDDINGstrgetr   r   VAR_CONTEXTVAR_CONTEXT_MAPVAR_RETRIEVAL_INFOjsonloadsfetchoner	   append)r,   re   rf   rg   	embeddingr:   ri   retrieval_optionsretrieval_options_placeholderretrieval_options_paramssimilarity_search_queryresultscontext_mapscontextdoc_idr[   rL   doc_argsrb   s                      r.   similarity_searchzMyVectorStore.similarity_searchg  s-   : #INN..u5	 3 34&=>I' %jj):HEO &

>3 ?'-zz2G'N#)::.?#C! GW!GC)+C+ !''($*:*:); <   // 0 !" #$% &%'#, ''C*BC
 (?*; <=G::foo&7&:;L'  $ 1 12!D4D4D3E F(( $O46 -3OO,=) !$+ '+/::h+?HZ(**s#) (, Q 544s   FGGc                     | S )a  
        Enter the runtime context related to this vector store instance.

        Returns:
            The current MyVectorStore object, allowing use within a `with` statement block.

        Usage Notes:
            - Intended for use in a `with` statement to ensure automatic
              cleanup of resources.
            - No special initialization occurs during context entry, but enables
              proper context-managed lifecycle.

        Example:
            with MyVectorStore(db_connection, embedder) as vectorstore:
                vectorstore.add_texts([...])
                # Vector store is active within this block.
            # All storage and resources are now cleaned up.
        ro   )r,   s    r.   	__enter__zMyVectorStore.__enter__  s	    & r/   exc_typeexc_valexc_tbc                 $    | j                          y)a  
        Exit the runtime context for the vector store, ensuring all storage
        resources are cleaned up.

        Args:
            exc_type: The exception type, if any exception occurred in the context block.
            exc_val: The exception value, if any exception occurred in the context block.
            exc_tb:  The traceback object, if any exception occurred in the context block.

        Returns:
            None: Indicates that exceptions are never suppressed; they will propagate as normal.

        Implementation Notes:
            - Automatically deletes all vector store data and backing tables via `delete_all()`
            upon exiting the context.
            - This cleanup occurs whether the block exits normally or due to an exception.
            - Does not suppress exceptions; errors in the context block will continue to propagate.
            - Use when the vector store lifecycle is intended to be temporary or scoped.

        Example:
            with MyVectorStore(db_connection, embedder) as vectorstore:
                vectorstore.add_texts([...])
                # Vector store is active within this block.
            # All storage and resources are now cleaned up.
        N)rD   )r,   r   r   r   s       r.   __exit__zMyVectorStore.__exit__  s    > 	r/   r8   )r%   N)NN)   )r%   r   )*__name__
__module____qualname____doc__r   r   r   __annotations__r   r
   r   rq   r    r   r!   intr"   r(   rM   r5   r@   r   r   rF   rD   r   dictr   rS   classmethodr   rX   r	   rO   r   r   r   typeBaseExceptionobjectr   __classcell__)r-   s   @r.   r   r   I   s0   & /:mN+;'MIz)#L#%!,K#. +#-MHc!
 *.
.
 :&
 
	
@ S	  %.N"(8C=1 "s "t ">
( +/#'	 6} 6 DJ' 6 d3i 	 6
  6 
c 6D 
 +/15#}# # DJ'	#
 /# 
# #L ;?7h7.23i7	c7x jj j 	j
 
hjX*d
# }d*+ fdl#	
 
r/   r   )+r   rv   typingr   r   r   r   r   r   pandasr_   langchain_core.documentsr	   langchain_core.embeddingsr
   langchain_core.vectorstoresr   pydanticr   mysql.ai.genai.embeddingr   mysql.ai.utilsr   r   r   r   r   r   r   r   r   r   mysql.connector.abstractsr   r+   EMBEDDING_SOURCErp   rs   rt   ru   VAR_OPTIONSID_SPACEr   ro   r/   r.   <module>r      s   :  A A  - 0 3   1   >& $ !"*- )#$L1&'7  )~K ~r/   