Skip to content

util API

maai.util

conv_2float_2_byte(val1, val2)

Convert two double-precision floats into a combined byte array.

Parameters:

Name Type Description Default
val1 float

First float.

required
val2 float

Second float.

required

Returns:

Name Type Description
bytes

Combined byte array.

Source code in src/maai/util.py
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def conv_2float_2_byte(val1, val2):
    """Convert two double-precision floats into a combined byte array.

    Args:
        val1 (float): First float.
        val2 (float): Second float.

    Returns:
        bytes: Combined byte array.
    """
    b1 = struct.pack('<d', val1)
    b2 = struct.pack('<d', val2)

    b = b1 + b2

    return b

conv_2int16_2_byte(val1, val2)

Convert two integers into a combined byte array using 2 bytes each.

Parameters:

Name Type Description Default
val1 int

First integer.

required
val2 int

Second integer.

required

Returns:

Name Type Description
bytes

Combined byte array.

Source code in src/maai/util.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
def conv_2int16_2_byte(val1, val2):
    """Convert two integers into a combined byte array using 2 bytes each.

    Args:
        val1 (int): First integer.
        val2 (int): Second integer.

    Returns:
        bytes: Combined byte array.
    """
    b1 = val1.to_bytes(2, BYTE_ORDER)
    b2 = val2.to_bytes(2, BYTE_ORDER)

    # print(b1)
    # print(b2)
    # concatenate two bytes
    b = b1 + b2

    #print(b)

    return b

conv_byte_2_2float(b1, b2)

Convert two double-precision byte blocks back into floats.

Parameters:

Name Type Description Default
b1 bytes

First byte block.

required
b2 bytes

Second byte block.

required

Returns:

Type Description

Tuple[float, float]: The decoded floats.

Source code in src/maai/util.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
def conv_byte_2_2float(b1, b2):
    """Convert two double-precision byte blocks back into floats.

    Args:
        b1 (bytes): First byte block.
        b2 (bytes): Second byte block.

    Returns:
        Tuple[float, float]: The decoded floats.
    """
    val1 = struct.unpack('<d', b1)[0]
    val2 = struct.unpack('<d', b2)[0]

    return val1, val2

conv_bytearray_2_vapresult(barr)

Deserialize a byte array back into a VAP result dictionary.

Parameters:

Name Type Description Default
barr bytes

Serialized byte array.

required

Returns:

Type Description

Dict[str, Any]: The decoded VAP result data.

Source code in src/maai/util.py
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
def conv_bytearray_2_vapresult(barr):
    """Deserialize a byte array back into a VAP result dictionary.

    Args:
        barr (bytes): Serialized byte array.

    Returns:
        Dict[str, Any]: The decoded VAP result data.
    """
    idx = 0
    t = struct.unpack('<d', barr[idx:8])[0]
    idx += 8

    len_x1 = struct.unpack('<I', barr[idx:idx+4])[0]
    idx += 4
    x1 = conv_bytearray_2_floatarray(barr[idx:idx+8*len_x1])
    idx += 8*len_x1

    len_x2 = struct.unpack('<I', barr[idx:idx+4])[0]
    idx += 4
    x2 = conv_bytearray_2_floatarray(barr[idx:idx+8*len_x2])
    idx += 8 * len_x2

    len_p_now = struct.unpack('<I', barr[idx:idx+4])[0]
    idx += 4
    p_now = conv_bytearray_2_floatarray(barr[idx:idx+8*len_p_now])
    idx += 8*len_p_now

    len_p_future = struct.unpack('<I', barr[idx:idx+4])[0]
    idx += 4
    p_future = conv_bytearray_2_floatarray(barr[idx:idx+8*len_p_future])
    idx += 8*len_p_future

    len_vad = struct.unpack('<I', barr[idx:idx+4])[0]
    idx += 4
    vad = conv_bytearray_2_floatarray(barr[idx:idx+8*len_vad])
    idx += 8*len_vad

    result_vap = {
        't': t,
        'x1': x1,
        'x2': x2,
        'p_now': p_now,
        'p_future': p_future,
        'vad': vad
    }

    return result_vap

conv_vapresult_2_bytearray(vap_result)

Serialize a VAP result dictionary into a byte array.

Parameters:

Name Type Description Default
vap_result Dict[str, Any]

VAP result data.

required

Returns:

Name Type Description
bytes

The serialized byte array.

Source code in src/maai/util.py
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
def conv_vapresult_2_bytearray(vap_result):
    """Serialize a VAP result dictionary into a byte array.

    Args:
        vap_result (Dict[str, Any]): VAP result data.

    Returns:
        bytes: The serialized byte array.
    """
    b = b''
    #print(type(vap_result['t']))
    b += struct.pack('<d', vap_result['t'])

    b += len(vap_result['x1']).to_bytes(4, BYTE_ORDER)
    b += conv_floatarray_2_byte(vap_result['x1'])

    b += len(vap_result['x2']).to_bytes(4, BYTE_ORDER)
    b += conv_floatarray_2_byte(vap_result['x2'])

    b += len(vap_result['p_now']).to_bytes(4, BYTE_ORDER)
    b += conv_floatarray_2_byte(vap_result['p_now'])

    b += len(vap_result['p_future']).to_bytes(4, BYTE_ORDER)
    b += conv_floatarray_2_byte(vap_result['p_future'])

    b += len(vap_result['vad']).to_bytes(4, BYTE_ORDER)
    b += conv_floatarray_2_byte(vap_result['vad'])

    return b

download_continuous_mimi_onnx(precision='fp32', cache_dir=None, force_download=False)

Resolve paths to the streaming Mimi ONNX model and JSON sidecar on disk.

Files are fetched from maai-kyoto/continuous-mimi-onnx via hf_hub_download (cached under the usual Hugging Face cache layout, or under cache_dir when set).

Source code in src/maai/util.py
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
def download_continuous_mimi_onnx(
    precision: str = "fp32",
    cache_dir: str | None = None,
    force_download: bool = False,
) -> tuple[str, str]:
    """
    Resolve paths to the streaming Mimi ONNX model and JSON sidecar on disk.

    Files are fetched from ``maai-kyoto/continuous-mimi-onnx`` via ``hf_hub_download``
    (cached under the usual Hugging Face cache layout, or under ``cache_dir`` when set).
    """
    precision = str(precision).strip().lower()
    if precision == "fp32":
        onnx_fn = "continuous_mimi_fp32.onnx"
        meta_fn = "continuous_mimi_fp32.json"
    elif precision == "int8":
        onnx_fn = "continuous_mimi_int8.onnx"
        meta_fn = "continuous_mimi_int8.json"
    else:
        raise ValueError(f"Unsupported precision for continuous Mimi ONNX: {precision}")

    onnx_path = hf_hub_download(
        repo_id=CONTINUOUS_MIMI_ONNX_REPO_ID,
        filename=onnx_fn,
        cache_dir=cache_dir,
        force_download=force_download,
    )
    meta_path = hf_hub_download(
        repo_id=CONTINUOUS_MIMI_ONNX_REPO_ID,
        filename=meta_fn,
        cache_dir=cache_dir,
        force_download=force_download,
    )
    return str(onnx_path), str(meta_path)

euler_to_quaternion(rx, ry, rz)

XYZ intrinsic Euler angles (radians) to quaternion (qx, qy, qz, qw).

Source code in src/maai/util.py
803
804
805
806
807
808
809
810
811
812
813
814
815
def euler_to_quaternion(rx: float, ry: float, rz: float) -> tuple[float, float, float, float]:
    """XYZ intrinsic Euler angles (radians) to quaternion (qx, qy, qz, qw)."""
    cx = math.cos(rx / 2.0)
    sx = math.sin(rx / 2.0)
    cy = math.cos(ry / 2.0)
    sy = math.sin(ry / 2.0)
    cz = math.cos(rz / 2.0)
    sz = math.sin(rz / 2.0)
    qx = sx * cy * cz + cx * sy * sz
    qy = cx * sy * cz - sx * cy * sz
    qz = cx * cy * sz + sx * sy * cz
    qw = cx * cy * cz - sx * sy * sz
    return (qx, qy, qz, qw)

generate_natural_nod(range_rad, count, use_pre_rise, velocity, fps=30, decay_rate=0.6, pre_rise_ratio=0.8)

Generate a natural nodding motion sequence (pitch in radians vs time in seconds).

Interpolation is cubic spline (CubicSpline) when scipy is available, else cosine interpolation between keyframes.

Parameters

range_rad : float Nod depth in radians (absolute value). count : int Number of nods (>= 1). use_pre_rise : bool Whether to include a pre-rise before the first nod. velocity : float Target average angular velocity (rad/s). fps : int Output frame rate. decay_rate : float Amplitude decay per nod (0–1). pre_rise_ratio : float Pre-rise amplitude as a ratio of range_rad.

Returns

motion : np.ndarray Pitch values in radians, one per frame. time_axis : np.ndarray Time stamps in seconds, same length as motion.

Source code in src/maai/util.py
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
def generate_natural_nod(
    range_rad: float,
    count: int,
    use_pre_rise: bool,
    velocity: float,
    fps: int = 30,
    decay_rate: float = 0.6,
    pre_rise_ratio: float = 0.8,
) -> tuple[np.ndarray, np.ndarray]:
    """Generate a natural nodding motion sequence (pitch in radians vs time in seconds).

    Interpolation is cubic spline (``CubicSpline``) when scipy is available,
    else cosine interpolation between keyframes.

    Parameters
    ----------
    range_rad : float
        Nod depth in radians (absolute value).
    count : int
        Number of nods (>= 1).
    use_pre_rise : bool
        Whether to include a pre-rise before the first nod.
    velocity : float
        Target average angular velocity (rad/s).
    fps : int
        Output frame rate.
    decay_rate : float
        Amplitude decay per nod (0–1).
    pre_rise_ratio : float
        Pre-rise amplitude as a ratio of *range_rad*.

    Returns
    -------
    motion : np.ndarray
        Pitch values in radians, one per frame.
    time_axis : np.ndarray
        Time stamps in seconds, same length as *motion*.
    """
    count = max(1, int(count))
    fps = max(1, int(fps))

    keyframe_vals: list[float] = [0.0]
    current_amp = abs(float(range_rad))

    if use_pre_rise:
        keyframe_vals.extend(
            [
                current_amp * float(pre_rise_ratio),
                -current_amp,
                0.0,
            ]
        )
    else:
        keyframe_vals.extend([-current_amp, 0.0])

    for _ in range(count - 1):
        current_amp *= float(decay_rate)
        keyframe_vals.extend([-current_amp, 0.0])

    n_seg = len(keyframe_vals) - 1
    distances: list[float] = []
    vel_scales: list[float] = []
    for i in range(n_seg):
        d = abs(keyframe_vals[i + 1] - keyframe_vals[i])
        distances.append(d)
        if d < 1e-6:
            vel_scales.append(1.0)
        else:
            if use_pre_rise:
                nod_idx = 0 if i < 3 else 1 + (i - 3) // 2
            else:
                nod_idx = i // 2
            vel_scales.append(float(decay_rate) ** (nod_idx * 0.5))

    total_distance = sum(distances)
    raw_total = sum(d / vs for d, vs in zip(distances, vel_scales))
    total_time = total_distance / float(velocity) if float(velocity) > 1e-9 else 0.0

    keyframe_frames: list[int] = [0]
    for d, vs in zip(distances, vel_scales):
        if raw_total > 1e-9 and d >= 1e-6:
            duration = (d / vs) * total_time / raw_total
            n_frames = max(1, int(round(duration * fps)))
        else:
            n_frames = 0
        keyframe_frames.append(keyframe_frames[-1] + n_frames)

    keyframe_times = [f / fps for f in keyframe_frames]
    total_frames = keyframe_frames[-1]

    if total_frames <= 0:
        return np.array([0.0]), np.array([0.0])

    time_axis = np.arange(total_frames) / fps

    motion = _nod_motion_cubic_spline_or_cosine(
        keyframe_times,
        keyframe_vals,
        keyframe_frames,
        time_axis,
    )
    return motion, time_axis

get_available_models()

Retrieve a dictionary of available pre-trained models from the Hugging Face hub.

Returns:

Type Description

Dict[str, List[str]]: A mapping of repository IDs to their available model files.

Source code in src/maai/util.py
269
270
271
272
273
274
275
276
277
278
279
def get_available_models():
    """Retrieve a dictionary of available pre-trained models from the Hugging Face hub.

    Returns:
        Dict[str, List[str]]: A mapping of repository IDs to their available model files.
    """
    available_models = {}
    for repo_id in repo_ids.values():
        files = list_repo_files(repo_id)
        available_models[repo_id] = [file for file in files if file.endswith(".pt")]
    return available_models

load_vap_model(mode, frame_rate, context_len_sec, language='jp', device='cpu', cache_dir=None, force_download=False, model_type='normal')

Load a pretrained VAP model from the Hugging Face hub.

Parameters:

Name Type Description Default
mode str

The operational mode of the model (e.g., 'vap', 'vap_mc', 'bc', 'nod').

required
frame_rate float

The frame rate expected by the model.

required
context_len_sec float

The context length in seconds.

required
language str

The language identifier for the model (e.g., 'jp', 'en').

'jp'
device str

The device to load the model onto ('cpu', 'cuda').

'cpu'
cache_dir str

Directory to cache the downloaded model.

None
force_download bool

If True, forces download even if cached.

False
model_type str

The general model architecture type.

'normal'

Returns:

Type Description

Dict[str, Any]: The loaded state dictionary of the model.

Source code in src/maai/util.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def load_vap_model(mode: str, frame_rate: float, context_len_sec: float, language: str = "jp", device: str = "cpu", cache_dir: str = None, force_download: bool = False, model_type: str = "normal"):
    """Load a pretrained VAP model from the Hugging Face hub.

    Args:
        mode (str): The operational mode of the model (e.g., 'vap', 'vap_mc', 'bc', 'nod').
        frame_rate (float): The frame rate expected by the model.
        context_len_sec (float): The context length in seconds.
        language (str): The language identifier for the model (e.g., 'jp', 'en').
        device (str): The device to load the model onto ('cpu', 'cuda').
        cache_dir (str, optional): Directory to cache the downloaded model.
        force_download (bool): If True, forces download even if cached.
        model_type (str): The general model architecture type.

    Returns:
        Dict[str, Any]: The loaded state dictionary of the model.
    """
    frame_rate_label = _format_frame_rate(frame_rate)
    encoder_type = resolve_encoder_type(model_type)
    encoder_suffix = ""
    if encoder_type == "mimi":
        encoder_suffix = "_mimi"
    elif encoder_type != "cpc":
        raise ValueError(f"Unsupported encoder_type for pretrained model lookup: {encoder_type}")

    if mode == "vap":
        if language == "jp":
            repo_id = repo_ids["vap_jp"]
            file_path = f"vap{encoder_suffix}_state_dict_jp_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "en":
            repo_id = repo_ids["vap_en"]
            file_path = f"vap{encoder_suffix}_state_dict_en_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "ch":
            repo_id = repo_ids["vap_ch"]
            file_path = f"vap{encoder_suffix}_state_dict_ch_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "tri":
            repo_id = repo_ids["vap_tri"]
            file_path = f"vap{encoder_suffix}_state_dict_tri_ecj_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "jp_kyoto":
            repo_id = repo_ids["vap_jp_kyoto"]
            file_path = f"vap{encoder_suffix}_state_dict_jp_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "en_kyoto":
            repo_id = repo_ids["vap_en_kyoto"]
            file_path = f"vap{encoder_suffix}_state_dict_en_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "ch_kyoto":
            repo_id = repo_ids["vap_ch_kyoto"]
            file_path = f"vap{encoder_suffix}_state_dict_ch_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "tri_kyoto":
            repo_id = repo_ids["vap_tri_kyoto"]
            file_path = f"vap{encoder_suffix}_state_dict_tri_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "ca":
            repo_id = repo_ids["vap_ca"]
            file_path = f"vap{encoder_suffix}_state_dict_ca_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "fr":
            repo_id = repo_ids["vap_fr"]
            file_path = f"vap{encoder_suffix}_state_dict_fr_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        else:
            supported_languages = ["jp", "en", "ch", "tri", "jp_kyoto", "en_kyoto", "ch_kyoto", "tri_kyoto", "ca", "fr"]
            raise ValueError(f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}")

    elif mode == "vap_mc":
        if language == "jp":
            repo_id = repo_ids["vap_mc_jp"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_jp_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "en":
            repo_id = repo_ids["vap_mc_en"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_en_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "ch":
            repo_id = repo_ids["vap_mc_ch"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_ch_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "tri":
            repo_id = repo_ids["vap_mc_tri"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_tri_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "fr":
            repo_id = repo_ids["vap_mc_fr"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_fr_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "jp_kyoto":
            repo_id = repo_ids["vap_mc_jp_kyoto"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_jp_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "en_kyoto":
            repo_id = repo_ids["vap_mc_en_kyoto"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_en_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "ch_kyoto":
            repo_id = repo_ids["vap_mc_ch_kyoto"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_ch_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "tri_kyoto":
            repo_id = repo_ids["vap_mc_tri_kyoto"]
            file_path = f"vap_mc{encoder_suffix}_state_dict_tri_kyoto_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        else:
            supported_languages = ["jp", "en", "ch", "tri", "jp_kyoto", "en_kyoto", "ch_kyoto", "tri_kyoto", "fr"]
            raise ValueError(f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}")

    elif mode == "bc":
        if language == "jp":
            repo_id = repo_ids["vap_bc_jp"]
            file_path = f"vap-bc{encoder_suffix}_state_dict_jp_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "en":
            repo_id = repo_ids["vap_bc_en"]
            file_path = f"vap-bc{encoder_suffix}_state_dict_en_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "ch":
            repo_id = repo_ids["vap_bc_ch"]
            file_path = f"vap-bc{encoder_suffix}_state_dict_ch_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "tri":
            repo_id = repo_ids["vap_bc_tri"]
            file_path = f"vap-bc{encoder_suffix}_state_dict_tri_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        else:
            supported_languages = ["jp", "en", "ch", "tri"]
            raise ValueError(f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}")

    elif mode == "bc_2type":

        if language == "jp":
            repo_id = repo_ids["vap_bc_2type_jp"]
            file_path = f"vap-bc-2type{encoder_suffix}_state_dict_jp_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        # elif language == "en":
        #     repo_id = repo_ids["vap_bc_2type_en"]
        #     file_path = f"vap-bc_2type_state_dict_erica_{frame_rate}hz_{int(context_len_sec*1000)}msec.pt"

        else:
            supported_languages = ["jp", "en", "tri"]
            raise ValueError(f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}")

    elif mode == "nod":

        if language == "jp":
            repo_id = repo_ids["vap_nod_jp"]
            file_path = f"vap-nod{encoder_suffix}_state_dict_erica_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        elif language == "en":
            repo_id = repo_ids["vap_nod_en"]
            file_path = f"vap-nod{encoder_suffix}_state_dict_erica_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        else:
            supported_languages = ["jp", "en", "tri"]
            raise ValueError(f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}")

    elif mode == "vap_prompt":

        if language == "jp":
            repo_id = repo_ids["vap_prompt_jp"]
            file_path = f"vap_prompt{encoder_suffix}_state_dict_jp_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"

        else:
            supported_languages = ["jp"]
            raise ValueError(f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}")

    elif mode == "nod_para":
        if language != "jp":
            supported_languages = ["jp"]
            raise ValueError(
                f"Invalid language: {language}. Mode {mode} supports languages are: {supported_languages}"
            )
        repo_id = repo_ids["vap_nod_para_jp"]
        file_path = (
            f"vap-nod_para_state_dict_erica_{frame_rate_label}hz_{int(context_len_sec*1000)}msec.pt"
        )

    else:
        supported_modes = ["vap", "vap_mc", "bc", "bc_2type", "nod", "vap_prompt", "nod_para"]
        raise ValueError(f"Invalid mode: {mode}. Supported modes are: {supported_modes}")

    try:
        sd = hf_hub_download(repo_id=repo_id, filename=file_path, cache_dir=cache_dir, force_download=force_download)

    except Exception as e:
        raise ValueError(f"Invalid model: mode: {mode}, frame_rate: {frame_rate}, context_len_sec: {context_len_sec}, language: {language}. Run get_available_models() for available models.")

    sd = torch.load(sd, map_location=torch.device(device))

    return sd

resolve_encoder_type(model_type='normal')

Resolve the encoder type based on the provided model type.

Parameters:

Name Type Description Default
model_type str

The type of model (e.g., 'normal', 'normal-ver2').

'normal'

Returns:

Name Type Description
str str

The corresponding encoder type ('cpc' or 'mimi').

Source code in src/maai/util.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def resolve_encoder_type(model_type: str = "normal") -> str:
    """Resolve the encoder type based on the provided model type.

    Args:
        model_type (str): The type of model (e.g., 'normal', 'normal-ver2').

    Returns:
        str: The corresponding encoder type ('cpc' or 'mimi').
    """
    try:
        return MODEL_TYPE_TO_ENCODER_TYPE[model_type]
    except KeyError as exc:
        supported_model_types = list(MODEL_TYPE_TO_ENCODER_TYPE.keys())
        raise ValueError(
            f"Unsupported model_type: {model_type}. Supported model_type values are: {supported_model_types}"
        ) from exc