Metadata-Version: 2.1
Name: transformers-model
Version: 0.0.2
Summary: Model hub for transformers.
Home-page: https://gitee.com/summry/torch-model-hub
Author: summy
Author-email: fkfkfk2024@2925.com
License: UNKNOWN
Keywords: Pytorch,AI,Machine learning,Deep learning,Bert,llm,transformers
Platform: UNKNOWN
Requires-Python: >=3.6
Description-Content-Type: text/markdown

Usage Sample
''''''''''''

.. code:: python

        import pandas as pd
        from sklearn.model_selection import train_test_split
        import torch
        from transformers import BertTokenizer
        from nlpx.tokenize.utils import get_df_text_labels
        from nlpx.dataset import TextDataset, text_collate
        from transformers_model import AutoCNNTextClassifier, AutoCNNTokenClassifier,BertDataset, BertCollator, BertTokenizeCollator
        from nlpx.model.wrapper import ClassifyModelWrapper
        
        ######################## AutoCNNTextClassifier classification ##########################
        classes = ['class1', 'class2', 'class3'...]
        texts = [[str],]
        labels = [0, 0, 1, 2, 1...]
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        train_texts, test_texts, y_train, y_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
        
        train_set = TextDataset(train_texts, y_train)
        test_set = TextDataset(test_texts, y_test)
        model = AutoCNNTextClassifier(pretrained_path, len(classes), device)
        wrapper = ClassifyModelWrapper(model, classes, device)
        _ = wrapper.train(train_set, test_set, collate_fn=text_collate)

        ######################### AutoCNNTokenClassifier classification ##########################
        tokenizer = BertTokenizer.from_pretrained(pretrained_path)

        ###################################### BertCollator ######################################
        train_tokenizies = tokenizer.batch_encode_plus(
                train_texts,
                max_length=60,
                padding="max_length",
                truncation=True,
                return_token_type_ids=True,
                return_attention_mask=True,
                return_tensors="pt",
        )

        test_tokenizies = tokenizer.batch_encode_plus(
                test_texts,
                max_length=256,
                padding="max_length",
                truncation=True,
                return_token_type_ids=True,
                return_attention_mask=True,
                return_tensors="pt",
        )

        train_set = BertDataset(train_tokenizies, y_train)
        test_set = BertDataset(test_tokenizies, y_test)

        model = AutoCNNTokenClassifier(pretrained_path, len(classes), device)
        wrapper = ClassifyModelWrapper(model, classes, device)
        _ = wrapper.train(train_set, test_set, collate_fn=BertCollator())

        ################################ BertTokenizeCollator ################################
        train_set = TextDataset(train_texts, y_train)
        test_set = TextDataset(test_texts, y_test)
        model = AutoCNNTokenClassifier(pretrained_path, len(classes), device)
        wrapper = ClassifyModelWrapper(model, classes, device)
        _ = wrapper.train(train_set, test_set, collate_fn=BertTokenizeCollator(tokenizer, 60))


