forge-guardrails 0.1.0

Foundation types for an LLM-agent workflow framework
Documentation
+

Dj�#���Rt^RIHt^RIt^RIt^RIt^RIt^RIHt^RI	H
t
^RIHt]
!]
4P4P^,t]R,R,R,R	,R
,t]!RR44tR
RltRRltRRltRRltRRltRRltRRltRRlt]R8Xd]!]!44hR#)zBCompare local eval JSONL against published Forge leaderboard rows.)�annotationsN)�	dataclass)�Path)�Any�forge�docs�results�rawznative-vs-prompt.mdc�^�]tRt^t$R]R&R]R&R]R&R]R&R]R&R]R	&R
]R&RtR
#)�PublishedRow�str�model�backend_mode�float�score�accuracy�completeness�int�nzdict[str, float]�	scenarios�N)�__name__�
__module__�__qualname__�__firstlineno__�__annotations__�__static_attributes__r��!scripts/compare_published_eval.pyrrs(���J����L��O���
�F��rrc� �V^8�dQhRRRR/#)�r	r�returnrr)�formats"r�__annotate__r#s��*�*�s�*�u�*rc�D�\VPR44R,#)�%�Y@)r�rstrip)r	s&r�
parse_percentr(s������C��!�E�)�)rc� �V^8�dQhRRRR/#)r �textrr!zdict[str, str]r)r"s"rr#r#"s��;�;�s�;�~�;rc��VP4F,pRV9dK\\P!RV44u#	\	R4h)zrel=relevance_detectionz([A-Za-z0-9_]+)=([A-Za-z0-9_]+)z"published results legend not found)�
splitlines�dict�re�findall�
SystemExit)r*�lines& r�parse_legendr2"sC�����!��$�D�0���B�J�J�A�4�H�I�I�"��9�
:�:rc�(�V^8�dQhRRRRRRRR/#)r �pathrr
rrr!rr)r"s"rr#r#*s.��,W�,W�d�,W�3�,W�c�,W�l�,Wrc���VP4p\V4pRp\P!R4pVP	4EFtpVPR4'd.VP
4pW�PR4^,RpKHVPV4p	V	'gKcV	P4wr�pW�8wgW�8wdK�Vf\R4hVP
4p\V4^\V4,8d\RV24hV^^\V4,p
\W]4UUu/uF&wr�W�9gK
WN,\V4R,bK(	ppp\V
V\!V^,4\!V^,4\!V^,4\#V^,4VR7u#	\RVR	VR
24h \dRpELUi;iuuppi)Nz)^(.+?)\s+(LS/[NP])\s+\[reforged\]\s+(.+)$z
Model/Backend�Nz'published row found before table headerz$published row has unexpected shape: r&)r
rrrrrrzpublished row not found for � � [reforged])�	read_textr2r.�compiler,�
startswith�split�index�
ValueError�match�groupsr0�len�ziprrr(r)r4r
rr*�legend�header_abbrevs�row_rer1�partsr?�	row_model�row_backend_mode�metrics�scenario_values�abbr�valuers&&&              r�parse_published_rowrM*s����>�>��D�
�$�
�F�'+�N�
�Z�Z�D�
E�F����!���?�?�?�+�+��J�J�L�E�
&�!&�{�{�3�'7�!�';�'=�!>��
����T�"����/4�|�|�~�,�	�W���!1�!A���!��F�G�G��
�
����u�:��C��/�/�/��C�D�6�J�K�K���A��N�(;�$;�<�� #�>�C�
�C����~�
/�F�L�%��,��.�.�C�	�
�
��)���a��)�"�5��8�,�&�u�Q�x�0��%��(�m��
�	
�9"�L�3�E�7�!�L�>��U�
V�V��C�
&�!%��
&��$
s�0G�8
G#�G#�
G �G c� �V^8�dQhRRRR/#)r r4rr!�list[dict[str, Any]]r)r"s"rr#r#Ys����T��2�rc��.pVP4;_uu_4p\V^4FEwr4VP4pV'gKVP\P
!V44KG	RRR4V# \Pdp\TRTRT24ThRp?ii;i +'giT#;i)��:z: invalid JSON: N)�open�	enumerate�strip�append�json�loads�JSONDecodeErrorr0)r4�rows�handle�linenor1�stripped�excs&      r�
load_jsonlr_Ys���!#�D�	
������%�f�a�0�L�F��z�z�|�H���
S����D�J�J�x�0�1�1�
��K���'�'�
S� �D�6��6�(�2B�3�%�!H�I�s�R��
S��
���K�s/�,B,�%A;�-B,�;B)	�B$	�$B)	�)B,�,B=	c�(�V^8�dQhRRRRRRRR/#)r rZrOr�set[str]�local_modelz
str | Noner!r)r"s"rr#r#gs0����
�������	rc���.pVFapVPR4V9dKVeVPR4V8wdK7VPRR4R8wdKPVPV4Kc	V#)�scenarior
�ablation�reforged)�getrV)rZrrb�selected�rows&&&  r�select_local_rowsrjgsi��
�H����7�7�:��i�/���"�s�w�w�w�'7�;�'F���7�7�:�z�*�j�8���������Orc�$�V^8�dQhRRRRRR/#)r rhrOrrar!z@tuple[float, float, dict[str, float], dict[str, int], list[str]]r)r"s"rr#r#xs,��O�O�"�O��O�F�Orc	�|�VUu/uFq".bK	ppVF!pW4R,,PV4K#	\RVP444p\V4pV^8XdRR//V3#\	RV44p\	RV44pVP4UU	u/uF2wr)V	'gKV\	RV	44\V	4,bK4	p
pp	VP4UU	u/uFwr)V	'gKV\V	4bK	ppp	Wv,W�,W�V3#uupiuup	piuup	pi)rdc3�>"�TFwrV'dKVx�K	R#5i)Nr)�.0rd�valuess&  r�	<genexpr>� local_metrics.<locals>.<genexpr>�s���X�6I�"2�(�QW�X�X�6I�s��
gc3�j"�TF)p\VPR44'gK%^x�K+	R#5i��successN��boolrg�rnris& rrprq�s"���F��#�T�#�'�'�)�2D�-E�A�A����"3�
3c3�j"�TF)p\VPR44'gK%^x�K+	R#5i)rNrurws& rrprq�s"���K��#�T�#�'�'�.�2I�-J�A�A��rxc3�j"�TF)p\VPR44'gK%^x�K+	R#5irsrurws& rrprq�s"���F�6�C�T�#�'�'�)�2D�-E�a�a�6�rx)rV�sorted�itemsrA�sum)rhrrd�by_scenariori�missing�total�	successes�	completedro�per_scenario�countss&&          r�
local_metricsr�xs@��R[�3[�QZ�X�b�L�QZ�K�3[����
�O�$�+�+�C�0���X�k�6G�6G�6I�X�X�G���M�E���z��C��R��(�(��F��F�F�I��K��K�K�I�!,� 1� 1� 3�� 3��H��	U��#�F�6�F�F��V��T�T� 3���
=H�<M�<M�<O�
Z�<O�(8��SY�#�h��F��#�<O�F�
Z���i�/��w�N�N��#4\����
[s�D-�)D2�:'D2�7D8�D8c� �V^8�dQhRRRR/#)r rLrr!rr)r"s"rr#r#�s��!�!�e�!��!rc��V^d,R
R2#)�d�.1fr%r)rLs&r�ppr��s���c�k�#�
�a� � rc��V^8�dQhRR/#)r r!rr)r"s"rr#r#�s��j
�j
�c�j
rc���\P!RR7pVPR\R7VPR\\R7VPRRRR	7VPR
RR.RR
R7VPRRR7VPR\
RR7VPR\
RR7VPR\
RR7VPRRR7VPRRRR7VP
4p\VPVPVP4p\VP4p\VP4p\W4VP 4pVUu.uF3pVP#R4R8XgVP#R 4R!8XgK1VNK5	ppV'd�VPR8Xd�VP$'g�\'VUu0uF*pVP#R"R#4R$VP#RR#42kK,	up4p\)R%VPR&VPR'24\)R(\+V424\)R)R*P-V424\)R+4^#\/VV4wr�r�p
.p.pV
'd(VP1R,R*P-V
4,4VP2VP4R-,,
pVP6VP8R-,,
pV	V8dCVP1R.\;V	4R/\;VP24R0VP4R1
R224V
V8dCVP1R3\;V
4R/\;VP64R0VP8R1
R224VP<R-,p\'VPP?44F�wppVV9dKVV,pVVV,
8gK(VR4\;V4R/\;V4R0VP<R1
R22pVP@'dVP1V4KxVP1V4K�	\)R%VPR&VPR'24\)R5VPB24\)R(\EVPG4424\)R6\I\'VP?44424\)R7\;V	4R8\;VP2424\)R9\;V
4R8\;VP6424V'd#\)R:4VFp\)R;V24K	V'dc\JPLPO4\)R<\JPPR=7VF!p\)R;V2\JPPR=7K#	^#\)R>4^#uupiuupi)?z8Compare local eval JSONL against published Forge results)�description�jsonl)�typez--published)r��defaultz--modelTzPublished model identity)�required�helpz--backend-modezLS/NzLS/Pz&Published leaderboard backend/mode row)�choicesr�r�z
--local-modelz!Local JSONL model identity filter)r�z--score-tolerance-ppg.@z--completeness-tolerance-ppg@z--scenario-tolerance-ppg>@z--strict-scenarios�
store_true)�actionz--force-proxy-comparezICompare proxy rows to direct published rows despite backend/mode mismatch)r�r��mode�proxy�eval_target_backendzopenai-proxy�backend�unknown�/zPublished baseline: r7r8zLocal rows:         zLocal modes:        z, z�
Published comparison skipped: local rows are proxy-mode rows, not direct LS/N rows. Compare against LS/P or pass --force-proxy-compare to compare anyway.zmissing scenarios: r&zscore z below published z minus r�r�z
completeness z: zPublished N:        zLocal scenario N:   zScore:              local z vs published zCompleteness:       local z

Warnings:z  - z

Failures:)�filez
Published comparison passed.))�argparse�ArgumentParser�add_argumentr�DEFAULT_PUBLISHEDr�
parse_argsrM�	publishedr
rr_r��setrrjrbrg�force_proxy_comparer{�printrA�joinr�rVr�score_tolerance_ppr�completeness_tolerance_ppr��scenario_tolerance_ppr|�strict_scenariosrr}ror-�sys�stdout�flush�stderr)�parser�argsr�rZ�published_scenariosrhri�
proxy_rows�proxy_modes�local_score�	local_cmp�local_scenariosr�r�failures�warnings�score_floor�	cmp_floor�scenario_tolrd�published_score�local�message�warning�failures                         r�mainr��s~��
�
$�
$�N��F�����d��+�
���
�D�:K��L�
���	�D�7Q��R�
������ ��
5�	������.Q��R�
���.�U�D��I�
���5�E�3��O�
���1��t��L�
���,�\��B�
�����
X���
����D�#�D�N�N�D�J�J��@Q�@Q�R�I��d�j�j�!�D��i�1�1�2�� ��D�<L�<L�M�H����C��7�7�6�?�g�%����1F�)G�>�)Y�	�����
�i�,�,��6�t�?W�?W�?W��!�
�!���w�w�y�)�,�-�Q�s�w�w�v�y�/I�.J�K�!�
���	�$�Y�_�_�$5�Q�y�7M�7M�6N�k�Z�[�
�$�S��]�O�4�5�
�$�T�Y�Y�{�%;�$<�=�>�
�
7�	
�
�?L���@�<�K�O�W�
�H��H�����-��	�	�'�0B�B�C��/�/�D�$;�$;�e�$C�C�K��&�&��)G�)G�%�)O�O�I��[� �����R��_�%�%6�r�)�/�/�7J�6K�L��,�,�S�1��
5�	
��9������B�y�M�?�*;�B�y�?U�?U�<V�;W�X��3�3�C�8��
<�	
�
�-�-��5�L�%+�I�,?�,?�,E�,E�,G�%H�!��/��?�*����)���?�\�1�1��*�B�r�%�y�k�):�2�o�;N�:O�P��3�3�C�8��<�
��$�$�$�����(�����(�&I�
� ���� 1��9�3I�3I�2J�+�
V�W�	� ����
�
.�/�	� ��V�]�]�_�!5� 6�
7�8�	� ��f�V�\�\�^�&<�!=� >�
?�@�	�&�r�+��&7�~�b����FY�EZ�
[�\�	�
$�R�	�]�O�>�"�Y�E[�E[�B\�A]�^���
�m���G��D��	�"�#� ���
�
����
�m�#�*�*�-��G��D��	�"����4� ��	�
*�+���[��
s�7.W0�*W0�&0W5�__main__)�__doc__�
__future__rr�rWr.r��dataclassesr�pathlibr�typingr�__file__�resolve�parents�ROOTr�rr(r2rMr_rjr�r�r�rr0rrr�<module>r�s���H�"���	�
�!����H�~����'�'��*���7�N�V�+�i�7�%�?�BW�W��� � �� �*�;�,W�^��"O�0!�j
�Z�z��
�T�V�
��r