o
    hB                     @  sZ  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ ddlmZm Z  ddl!m"Z"m#Z#m$Z$ dZ%dZ&dZ'ddiZ(e	G dd dZ)G dd dZ*G dd dZ+d-dd Z,d.d'd(Z-G d)d* d*e"Z.G d+d, d,e#Z/dS )/    )annotationsN)AsyncIterator)	dataclass)Anycast)AsyncOpenAI   )_debug)AgentsException)logger)Span	SpanErrorTranscriptionSpanDatatranscription_span   )STTWebsocketConnectionError)npnpt
websockets)
AudioInputStreamedAudioInput)StreamedTranscriptionSessionSTTModelSTTModelSettingsi  
   typesemantic_vadc                   @  s   e Zd ZU ded< dS )ErrorSentinel	ExceptionerrorN)__name__
__module____qualname____annotations__ r$   r$   _/var/www/html/openai_agents/venv/lib/python3.10/site-packages/agents/voice/models/openai_stt.pyr      s   
 r   c                   @     e Zd ZdS )SessionCompleteSentinelNr    r!   r"   r$   r$   r$   r%   r'   "       r'   c                   @  r&   )WebsocketDoneSentinelNr(   r$   r$   r$   r%   r*   &   r)   r*   
audio_data(list[npt.NDArray[np.int16 | np.float32]]returnstrc                 C  sL   t | }|jt jkrt |dd}|d t j}| }t	|
dS )Ng      g      ?i  utf-8)r   concatenatedtypefloat32clipastypeint16tobytesbase64	b64encodedecode)r+   concatenated_audioaudio_bytesr$   r$   r%   _audio_to_base64*   s   
r<   event_queueasyncio.Queue[dict[str, Any]]expected_types	list[str]timeoutfloatc                   s   t   }	 |t   |  }|dkrtd| tj|  |dI dH }|dd}||v r1|S |dkr?td	|d q)
zj
    Wait for an event from event_queue whose type is in expected_types within the specified timeout.
    Tr   zTimeout waiting for event(s): rA   Nr    r   Error event: )timeTimeoutErrorasynciowait_forgetr   )r=   r?   rA   
start_time	remainingevtevt_typer$   r$   r%   _wait_for_event4   s   rO   c                   @  s   e Zd ZdZd0ddZd1ddZd2ddZd1ddZd1ddZd3ddZ	d1ddZ
d4d"d#Zd1d$d%Zd1d&d'Zd1d(d)Zd5d+d,Zd1d-d.Zd/S )6OpenAISTTTranscriptionSessionz/A transcription session for OpenAI's STT model.inputr   clientr   modelr.   settingsr   trace_include_sensitive_databool"trace_include_sensitive_audio_datac                 C  s   d| _ || _|| _|| _|jpt| _|| _|| _|j	| _
t | _d | _t | _t | _g | _d | _d | _d | _d | _d | _d | _d S )NF)	connected_client_model	_settingsturn_detectionDEFAULT_TURN_DETECTION_turn_detection_trace_include_sensitive_data#_trace_include_sensitive_audio_dataqueue_input_queuerH   Queue_output_queue
_websocket_event_queue_state_queue_turn_audio_buffer_tracing_span_listener_task_process_events_task_stream_audio_task_connection_task_stored_exception)selfrQ   rR   rS   rT   rU   rW   r$   r$   r%   __init__J   s(   	


z&OpenAISTTTranscriptionSession.__init__r-   Nonec                 C  s6   t | j| jj| jj| jj| jdd| _| j  d S )N)temperaturelanguagepromptr\   )rS   model_config)	r   rZ   r[   rr   rs   rt   r^   ri   startro   r$   r$   r%   _start_turnl   s   	z)OpenAISTTTranscriptionSession._start_turn_transcriptc                 C  sd   t |dk rd S | jr0| jrt| j| jj_d| jj_| jr#|| jj_	| j
  g | _d | _d S d S )N   pcm)lenri   r`   r<   rh   	span_datarQ   input_formatr_   outputfinish)ro   ry   r$   r$   r%   	_end_turnx   s   



z'OpenAISTTTranscriptionSession._end_turnc                   s   | j d us
J d| j 2 zS3 d H W }z1t|}|ddkr*td|d |ddv r:| j|I d H  | j|I d H  W q ty` } z| j	t
|I d H  td|d }~ww 6 | jt I d H  d S )NWebsocket not initializedr   r   rE   )session.updatedtranscription_session.updatedsession.createdtranscription_session.createdzError parsing events)re   jsonloadsrJ   r   rg   putrf   r   rd   r   r*   )ro   messageeventer$   r$   r%   _event_listener   s"   

z-OpenAISTTTranscriptionSession._event_listenerc                   sV   | j d us
J d| j tddddddd| ji| jd	id
dI d H  d S )Nr   zsession.updatetranscriptionrQ   z	audio/pcmi]  )r   raterS   )formatr   r\   r   audio)r   session)re   sendr   dumpsrZ   r^   rw   r$   r$   r%   _configure_session   s    z0OpenAISTTTranscriptionSession._configure_sessionwswebsockets.ClientConnectionc              
     s`  || _ t|  | _zt| jddgtI d H }W n6 ty8 } zt	d}| j
t|I d H  ||d }~w tyP } z| j
t|I d H  |d }~ww |  I d H  z"t| jddgtI d H }tjrptd W d S td|  W d S  ty } zt	d}| j
t|I d H  ||d }~w ty } z| j
t|I d H   d }~ww )	Nr   r   z7Timeout waiting for transcription_session.created eventr   r   zSession updatedzSession updated: z7Timeout waiting for transcription_session.updated event)re   rH   create_taskr   rj   rO   rg   SESSION_CREATION_TIMEOUTrG   r   rd   r   r   r   r   SESSION_UPDATE_TIMEOUTr	   DONT_LOG_MODEL_DATAr   debug)ro   r   r   r   wrapped_errr$   r$   r%   _setup_connection   sV   
z/OpenAISTTTranscriptionSession._setup_connectionc              
     s   	 zIt j| j tdI d H }t|trW nX|dd}|dv rBtt|dd}t	|dkrB| 
| |   | j|I d H  t dI d H  W n" t jyU   Y n tym } z| jt|I d H  |d }~ww q| jt I d H  d S )	NTrC   r   unknown)#input_audio_transcription_completedz5conversation.item.input_audio_transcription.completed
transcriptrD   r   )rH   rI   rf   rJ   EVENT_INACTIVITY_TIMEOUT
isinstancer*   r   r.   r|   r   rx   rd   r   sleeprG   r   r   r'   )ro   r   
event_typer   r   r$   r$   r%   _handle_events   s2   


z,OpenAISTTTranscriptionSession._handle_eventsaudio_queue8asyncio.Queue[npt.NDArray[np.int16 | np.float32] | None]c              
     s   | j d us
J d|   	 | I d H }|d u rd S | j| z| j tdt	|
 ddI d H  W n# tjyF   Y d S  ty^ } z| jt|I d H  |d }~ww tdI d H  q)Nr   Tzinput_audio_buffer.appendr/   r   r   )re   rx   rJ   rh   appendr   r   r   r7   r8   r6   r9   r   ConnectionClosedr   rd   r   r   rH   r   )ro   r   bufferr   r$   r$   r%   _stream_audio   s2   z+OpenAISTTTranscriptionSession._stream_audioc              
     s   z^t jdd| jj ddd4 I d H =}| |I d H  t|  | _t| 	| j
| _d| _| jr=| jI d H  n	td tdW d   I d H  W d S 1 I d H sXw   Y  W d S  tyx } z| jt|I d H  |d }~ww )Nz5wss://api.openai.com/v1/realtime?intent=transcriptionzBearer 1)AuthorizationzOpenAI-Log-Session)additional_headersTzListener task not initialized)r   connectrY   api_keyr   rH   r   r   rk   r   rb   rl   rX   rj   r   r   r
   r   rd   r   r   )ro   r   r   r$   r$   r%   _process_websocket_connection  s.   

2z;OpenAISTTTranscriptionSession._process_websocket_connectionc                 C  s   | j r| j  r| j  }|rt|tr|| _| jr.| j r.| j }|r.t|tr.|| _| jrE| j rE| j }|rEt|trE|| _| jr^| j r`| j }|rbt|trd|| _d S d S d S d S d S N)	rm   done	exceptionr   r   rn   rk   rl   rj   )ro   excr$   r$   r%   _check_errors.  s$   




z+OpenAISTTTranscriptionSession._check_errorsc                 C  st   | j r| j  s| j   | jr| j s| j  | jr'| j s'| j  | jr6| j s8| j  d S d S d S r   )rj   r   cancelrk   rl   rm   rw   r$   r$   r%   _cleanup_tasksC  s   


z,OpenAISTTTranscriptionSession._cleanup_tasksAsyncIterator[str]c                 C s   t |  | _	 z
| j I d H }W n
 t jy   Y nw |d u s-t|ts-t|t	r3| j
  n	|V  | j
  q
| jrD| d | jrO| j I d H  |   | jrY| jd S )NTrD   )rH   r   r   rm   rd   rJ   CancelledErrorr   r   r'   	task_doneri   r   re   closer   rn   )ro   turnr$   r$   r%   transcribe_turnsP  s4   


z.OpenAISTTTranscriptionSession.transcribe_turnsc                   s$   | j r| j  I d H  |   d S r   )re   r   r   rw   r$   r$   r%   r   m  s   z#OpenAISTTTranscriptionSession.closeN)rQ   r   rR   r   rS   r.   rT   r   rU   rV   rW   rV   )r-   rq   )ry   r.   r-   rq   )r   r   r-   rq   )r   r   r-   rq   )r-   r   )r    r!   r"   __doc__rp   rx   r   r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r%   rP   G   s    

"




*




rP   c                   @  sF   e Zd ZdZdddZedd	d
ZdddZd ddZd!ddZ	dS )"OpenAISTTModelz"A speech-to-text model for OpenAI.rS   r.   openai_clientr   c                 C  s   || _ || _dS )zCreate a new OpenAI speech-to-text model.

        Args:
            model: The name of the model to use.
            openai_client: The OpenAI client to use.
        N)rS   rY   )ro   rS   r   r$   r$   r%   rp   w  s   
zOpenAISTTModel.__init__r-   c                 C  s   | j S r   )rS   rw   r$   r$   r%   
model_name  s   zOpenAISTTModel.model_namevaluer   c                 C  s   |d ur|S d S r   r$   )ro   r   r$   r$   r%   _non_null_or_not_given  s   z%OpenAISTTModel._non_null_or_not_givenrQ   r   rT   r   rU   rV   rW   c                   s   t | j|r
| ndd| |j| |j| |jddP}z1| jjj	j
| j| | |j| |j| |jdI dH }|rH|j|j_|jW W  d   S  tyo } zd|j_|tt|i d |d}~ww 1 ssw   Y  dS )zTranscribe an audio input.

        Args:
            input: The audio input to transcribe.
            settings: The settings to use for the transcription.

        Returns:
            The transcribed text.
        rD   r{   )rr   rs   rt   )rS   rQ   r~   ru   )rS   filert   rs   rr   N)r   data)r   rS   	to_base64r   rr   rs   rt   rY   r   transcriptionscreateto_audio_filetextr}   r   r   	set_errorr   r.   )ro   rQ   rT   rU   rW   spanresponser   r$   r$   r%   
transcribe  s<   


	




zOpenAISTTModel.transcriber   r   c                   s   t || j| j|||S )a  Create a new transcription session.

        Args:
            input: The audio input to transcribe.
            settings: The settings to use for the transcription.
            trace_include_sensitive_data: Whether to include sensitive data in traces.
            trace_include_sensitive_audio_data: Whether to include sensitive audio data in traces.

        Returns:
            A new transcription session.
        )rP   rY   rS   )ro   rQ   rT   rU   rW   r$   r$   r%   create_session  s   zOpenAISTTModel.create_sessionN)rS   r.   r   r   )r-   r.   )r   r   r-   r   )
rQ   r   rT   r   rU   rV   rW   rV   r-   r.   )
rQ   r   rT   r   rU   rV   rW   rV   r-   r   )
r    r!   r"   r   rp   propertyr   r   r   r   r$   r$   r$   r%   r   t  s    


*r   )r+   r,   r-   r.   )r=   r>   r?   r@   rA   rB   )0
__future__r   rH   r7   r   rF   collections.abcr   dataclassesr   typingr   r   openair   rD   r	   
exceptionsr
   r   tracingr   r   r   r   r   importsr   r   r   rQ   r   r   rS   r   r   r   r   r   r   r]   r   r'   r*   r<   rO   rP   r   r$   r$   r$   r%   <module>   s>    


  /