
    8yi                     r    d dl Z d dlZd dlZd dlmZmZmZmZ d dlm	Z	  e	e
          Z G d d          ZdS )    N)ListDictOptionalSet)setup_loggerc                       e Zd ZdZdZdZdefdZd ZdefdZ	dd
e
dee         fdZdd
e
dee         fdZddede
dee         fdZdde
de
defdZdee         dee         fdZdS )ObsidianIndexerzS
    Scans an Obsidian Vault to index files and extract samples based on tags.
    z#writing_samplez#reply_sample
vault_pathc                 6   |rt           j                            |          nd | _        | j        r$t           j                            | j                  s"t
                              d| j                    g | _        g | _        i | _	        i | _
        d| _        d S )NzObsidian Vault path invalid: r   )ospath
expanduserr
   existsloggerwarningwriting_samplesreply_samples
file_indexfile_metadatafiles_scanned)selfr
   s     3/root/projects/butler/slack_bot/obsidian/indexer.py__init__zObsidianIndexer.__init__   s    <FP"',,Z888D 	NbgnnT_&E&E 	NNNL4?LLMMM*,(**,/1    c                 T   | j         r$t          j                            | j                   s$t                              d| j                     dS t                              d| j                     g | _        g | _        i | _	        i | _
        d| _        t          j        | j                   D ]v\  }}}d |D             |dd<   |D ]\}|                    d          rEt          j                            ||          }|                     |           | xj        dz  c_        ]wt                              d| j         d	           t                              d
t!          | j                   d           t                              d
t!          | j                   d           dS )z9Scans the vault for markdown files and populates indexes.z Cannot scan: Invalid vault path NzScanning Obsidian Vault at: r   c                 <    g | ]}|                     d           |S ).)
startswith).0ds     r   
<listcomp>z.ObsidianIndexer.scan_vault.<locals>.<listcomp>+   s)    @@@Qall3.?.?@q@@@r   z.md   zScan complete. Scanned z files.zFound z writing samples.z reply samples.)r
   r   r   r   r   errorinfor   r   r   r   r   walkendswithjoin_process_filelen)r   rootdirsfilesfile	full_paths         r   
scan_vaultzObsidianIndexer.scan_vault   s    	bgnnT_&E&E 	LLMDOMMNNNFD4?DDEEE!!#!9!9 	, 	,D$@@$@@@DG , ,=='' , "T4 8 8I&&y111&&!+&&	, 	Id.@IIIJJJIS!566IIIJJJES!344EEEFFFFFr   	file_pathc                    	 t           j                            |          }|| j        |<   t	          |dd          5 }|                                }ddd           n# 1 swxY w Y   | j        |v r| j                            |           | j	        |v r| j
                            |           |dd                                         | j        |<   dS # t          $ r+}t                              d| d|            Y d}~dS d}~ww xY w)z!Reads a file and checks for tags.rutf-8encodingNi zFailed to read file z: )r   r   getmtimer   openreadWRITING_SAMPLE_TAGr   appendREPLY_SAMPLE_TAGr   lowerr   	Exceptionr   r   )r   r0   mtimefcontentes         r   r(   zObsidianIndexer._process_file7   sk   	DG$$Y//E,1Dy)iw777 #1&&((# # # # # # # # # # # # # # # &'11$++I666$//")))444
 *1&)9)?)?)A)ADOI&&& 	D 	D 	DNNB)BBqBBCCCCCCCCC	Ds;   ;C AC A""C %A"&A-C 
D
 DD
   countreturnc                     | j         sg S t          j        | j         t          t	          | j                   |                    }|                     |          S )z*Returns content of random writing samples.)r   randomsampleminr)   _read_filesr   rC   selected_pathss      r   get_writing_samplesz#ObsidianIndexer.get_writing_samplesS   sP    # 	It';STEYAZAZ\a=b=bcc///r   c                     | j         sg S t          j        | j         t          t	          | j                   |                    }|                     |          S )z(Returns content of random reply samples.)r   rF   rG   rH   r)   rI   rJ   s      r   get_reply_samplesz!ObsidianIndexer.get_reply_samples[   sP    ! 	It'93s4CU?V?VX];^;^__///r      querylimitc                    |                                 }t          j        d|          }|sg S t                              d|            g }| j                                        D ]\  }}d}d}	t          j        	                    |                                           }
|
 d|
 d| }|D ]8}||v r2|
                    |          }||t          |          dz  z  z  }|	dz  }	9|	dk    r||	dz  z  }|                    ||f           |                    d d	
           d |d|         D             }|                     |          S )z
        Simple keyword search. Returns content of matching files.
        RAG Strategy: Find notes containing the query keywords.
        z[a-zA-Z0-9_]+|[\u4e00-\u9fff]zSearch tokens: r    rB   r"      c                     | d         S Nr    xs    r   <lambda>z(ObsidianIndexer.search.<locals>.<lambda>   
    ! r   Tkeyreversec                     g | ]\  }}|S rW   rW   )r   _r   s      r   r!   z*ObsidianIndexer.search.<locals>.<listcomp>   s    >>>gaT>>>r   N)r<   refindallr   debugr   itemsr   r   basenamerC   r)   r:   sortrI   )r   rP   rQ   query_lowertokensscored_filesr   r@   scoreunique_matchesfilenamesearch_texttokenrC   	top_pathss                  r   searchzObsidianIndexer.searchc   s   
 kkmm <kJJ 	I/v//000 !_2244 	3 	3MD'EN w''--3355H%<<<<7<<K ( (K'' (--e44EUc%jjAo66E"a'N !!1!45##UDM222 	nnd;;;>>fuf)=>>>		***r   
   daysc                 "   ddl }|                                 }||dz  z
  }g }| j                                        D ]"\  }}||k    r|                    ||f           #|                    d d           |d|         }|sd| dS d	| d
g}	|D ]i\  }}t
          j                            |          }
|                    d|	                    |                    }|	                    d|
 d| d           jd
                    |	          S )zP
        Returns a formatted list of files modified in the last N days.
        r   NiQ c                     | d         S rV   rW   rX   s    r   rZ   z2ObsidianIndexer.get_recent_files.<locals>.<lambda>   r[   r   Tr\   zNo files modified in the last z days.u   📂 **Updated in last z days:**z%Y-%m-%d %H:%Mz- `z` ()
)timer   rd   r:   rf   r   r   re   strftime	localtimer'   )r   rr   rQ   rw   current_timecutoff_timerecent_filesr   r>   outputrl   date_strs               r   get_recent_filesz ObsidianIndexer.get_recent_files   sP    	yy{{"dUl3-3355 	3 	3KD%####UDM222 	nnd;;; $FUF+ 	A@D@@@@:D:::;' 	: 	:KE4w''--H}}%5t~~e7L7LMMHMM888X8889999yy   r   pathsc           	      "   g }|D ]}	 t          |dd          5 }t          j                            |          }|                    d| d|                                            ddd           n# 1 swxY w Y   z# t          $ r Y w xY w|S )z1Helper to read complete content of list of paths.r2   r3   r4   z--- Source: z ---
N)r7   r   r   re   r:   r8   r=   )r   r   contentspr?   rl   s         r   rI   zObsidianIndexer._read_files   s     	 	A!S7333 Oq!w//22HOO$M8$M$M16688$M$MNNNO O O O O O O O O O O O O O O    s5   A?AA3'A?3A7	7A?:A7	;A??
BBN)rB   )rO   )rO   rq   )__name__
__module____qualname____doc__r9   r;   strr   r/   r(   intr   rL   rN   rp   r   rI   rW   r   r   r	   r	   	   sZ         +&	3 	 	 	 	G G G6Ds D D D D80 0 0T#Y 0 0 0 00 0s 049 0 0 0 0-+ -+C -+ -+DI -+ -+ -+ -+^! !S !S !# ! ! ! !<c tCy      r   r	   )r   rF   ra   typingr   r   r   r   health.utils.logging_configr   r   r   r	   rW   r   r   <module>r      s    				  				 , , , , , , , , , , , , 4 4 4 4 4 4	h		r r r r r r r r r rr   