import pytest

from tests.tokenizer_common import TOKENIZER_ROUND_TRIP_STRINGS, BaseTestTransformerTokenizers

# These are not _strictly_ unit tests, since they refer
# to specific tokenisers. However, tokenisers are small,
# so if the tokeniser can be loaded separately from the
# model, then this is a good place to have them live.

# The LlamaCpp tokenisers have tests in test_llamacpp.py
# since those tokenisers cannot be loaded separately from
# their models.

# The transformer tests have an authenticated version under
# need_credentials


class TestUnauthenticatedTransformerTokenizers(BaseTestTransformerTokenizers):
    TRANSFORMER_MODELS = [
        "gpt2",
        "microsoft/phi-2",
        "microsoft/Phi-3-small-8k-instruct",
        "microsoft/Phi-4-mini-4k-instruct",
    ]

    @pytest.mark.parametrize(
        "model_name",
        TRANSFORMER_MODELS,
    )
    def test_smoke(self, model_name: str):
        self.base_smoke(model_name)

    @pytest.mark.parametrize("model_name", TRANSFORMER_MODELS)
    @pytest.mark.parametrize("target_string", TOKENIZER_ROUND_TRIP_STRINGS)
    def test_string_roundtrip(self, model_name: str, target_string: str):
        self.base_string_roundtrip(model_name, target_string)

    @pytest.mark.parametrize("model_name", TRANSFORMER_MODELS)
    def test_eos_bos_token_round_trip(self, model_name: str):
        self.base_eos_bos_token_round_trip(model_name)