1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""Jupyter / tqdm-compatible streaming callback helpers for OxiLLaMa."""
=
"""Stream tokens into a tqdm progress bar or any tqdm-like widget.
Works in standard terminals (tqdm) and Jupyter notebooks
(tqdm.notebook.tqdm / ipywidgets). The progress bar shows the count
of decoded tokens; the postfix shows the most-recently decoded piece.
Example::
from tqdm.auto import tqdm
from oxillama_py.tqdm_helper import TqdmProgress
bar = tqdm(desc="Generating", unit="tok")
cb = TqdmProgress(bar)
engine.generate_streaming(prompt, callback=cb)
bar.close()
"""
"""Initialise with an already-open tqdm instance.
Args:
pbar: An open ``tqdm`` instance (any variant: auto, notebook, …).
flush_on_newline: If *True*, call ``pbar.refresh()`` whenever a
newline character is encountered in the token stream so that
Jupyter output repagination is triggered promptly.
"""
=
=
: =
"""Token-callback compatible with ``StreamingCallback`` protocol."""
"""The full decoded text seen so far."""
return
"""Clear the accumulated token list and reset the progress bar counter."""
"""Simple token collector — no progress display, just accumulate text.
Useful when you want the full text after generation but still want a
callback-compatible object.
Example::
col = CollectTokens()
engine.generate_streaming(prompt, callback=col)
print(col.text)
"""
: =
return