vocab : adopt leading TemplateProcessing special token as BOS (#24428)

This commit is contained in:
o7si 2026-06-11 15:37:23 +08:00 committed by GitHub
parent 68f30663cf
commit 1bfbdb134e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -256,6 +256,11 @@ class SpecialVocab:
if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'):
if not tokenizer_config:
special_bos = special_first
elif special_first not in (special_bos, special_cls):
if not special_bos:
tokenizer_config['bos_token'] = special_bos = special_first
if not special_cls:
tokenizer_config['cls_token'] = special_cls = special_first
self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False
if special_first not in (special_bos, special_cls):
logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing<single>')