Class HuggingFaceParser
Parses a Hugging Face JSON configuration and builds a Tokenizer.
Inherited Members
Namespace: Unity.InferenceEngine.Tokenization.Parsers.HuggingFace
Assembly: Unity.InferenceEngine.Tokenization.dll
Syntax
public class HuggingFaceParser
Methods
GetDefault()
Gets a parser configured with all the built-in components builders, with the additional builders decorated with HfAttribute (with priority to the built-ins.
Declaration
public static HuggingFaceParser GetDefault()
Returns
| Type | Description |
|---|---|
| HuggingFaceParser | A pre-configured parser. |
Parse(string)
Parses a Hugging Face JSON configuration and builds a Tokenizer.
Declaration
public ITokenizer Parse(string content)
Parameters
| Type | Name | Description |
|---|---|---|
| string | content | Contains the JSON |
Returns
| Type | Description |
|---|---|
| ITokenizer | The corresponding tokenizer. |
SetBuilder(string, Lazy<IComponentBuilder<IDecoder>>)
Defines a builder for the type of decoder.
Declaration
public void SetBuilder(string type, Lazy<IComponentBuilder<IDecoder>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the decoder. |
| Lazy<IComponentBuilder<IDecoder>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, Lazy<IComponentBuilder<IMapper>>)
Defines a builder for the type of tokenizer model (IMapper).
Declaration
public void SetBuilder(string type, Lazy<IComponentBuilder<IMapper>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the tokenizer model (IMapper). |
| Lazy<IComponentBuilder<IMapper>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, Lazy<IComponentBuilder<INormalizer>>)
Defines a builder for the type of normalizer.
Declaration
public void SetBuilder(string type, Lazy<IComponentBuilder<INormalizer>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the normalizer. |
| Lazy<IComponentBuilder<INormalizer>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, Lazy<IComponentBuilder<IPadding>>)
Defines a builder for the type of padding.
Declaration
public void SetBuilder(string strategy, Lazy<IComponentBuilder<IPadding>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | strategy | Strategy of the padding. |
| Lazy<IComponentBuilder<IPadding>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, Lazy<IComponentBuilder<IPostProcessor>>)
Defines a builder for the type of post processor.
Declaration
public void SetBuilder(string type, Lazy<IComponentBuilder<IPostProcessor>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the post processor. |
| Lazy<IComponentBuilder<IPostProcessor>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, Lazy<IComponentBuilder<IPreTokenizer>>)
Defines a builder for the type of pre-tokenizer.
Declaration
public void SetBuilder(string type, Lazy<IComponentBuilder<IPreTokenizer>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the pre-tokenizer. |
| Lazy<IComponentBuilder<IPreTokenizer>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, Lazy<IComponentBuilder<ITruncator>>)
Defines a builder for the type of truncator.
Declaration
public void SetBuilder(string strategy, Lazy<IComponentBuilder<ITruncator>> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | strategy | Strategy of the truncator. |
| Lazy<IComponentBuilder<ITruncator>> | builder | A lazy reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<IDecoder>)
Defines a builder for the type of decoder.
Declaration
public void SetBuilder(string type, IComponentBuilder<IDecoder> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the decoder. |
| IComponentBuilder<IDecoder> | builder | A reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<IMapper>)
Defines a builder for the type of tokenizer model (IMapper).
Declaration
public void SetBuilder(string type, IComponentBuilder<IMapper> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the tokenizer model (IMapper). |
| IComponentBuilder<IMapper> | builder | A reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<INormalizer>)
Defines a builder for the type of normalizer.
Declaration
public void SetBuilder(string type, IComponentBuilder<INormalizer> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the normalizer. |
| IComponentBuilder<INormalizer> | builder | A reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<IPadding>)
Defines a builder for the type of padding.
Declaration
public void SetBuilder(string strategy, IComponentBuilder<IPadding> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | strategy | Strategy of the padding. |
| IComponentBuilder<IPadding> | builder | A reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<IPostProcessor>)
Defines a builder for the type of post processor.
Declaration
public void SetBuilder(string type, IComponentBuilder<IPostProcessor> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the post processor. |
| IComponentBuilder<IPostProcessor> | builder | A reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<IPreTokenizer>)
Defines a builder for the type of pre tokenizer.
Declaration
public void SetBuilder(string type, IComponentBuilder<IPreTokenizer> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | type | Type of the pre tokenizer. |
| IComponentBuilder<IPreTokenizer> | builder | A reference of a builder for the component. |
SetBuilder(string, IComponentBuilder<ITruncator>)
Defines a builder for the type of truncator.
Declaration
public void SetBuilder(string strategy, IComponentBuilder<ITruncator> builder)
Parameters
| Type | Name | Description |
|---|---|---|
| string | strategy | Strategy of the truncator. |
| IComponentBuilder<ITruncator> | builder | A reference of a builder for the component. |