保存“微调”的 bert 模型

2024-04-05

我正在尝试保存一个经过微调的 bert 模型。我已经正确运行了代码 - 它工作正常，并且在 ipython 控制台中我可以调用 getPrediction 并让它产生结果。

我保存了体重文件（最高的是 model.ckpt-333.data-00000-of-00001

我不知道如何保存模型以供重复使用。

我正在使用 bert-tensorflow。

import json

import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime


from sklearn.model_selection import train_test_split
import os

print("tensorflow version : ", tf.__version__)
print("tensorflow_hub version : ", hub.__version__)


#Importing BERT modules
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization

#set output directory of the model
OUTPUT_DIR = 'model'

#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = False #@param {type:"boolean"}

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    pass

tf.io.gfile.makedirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


### Load the data
data = pd.read_csv("data/bbc-text.csv")

data.columns = ['category', 'text']
print('*****Data Loaded: {} *****'.format(data.head()))

#check to see if any null values are present.
print('*****Empty Data: {} *****'.format(data[data.isnull().any(axis=1)]))

#encode category variable into numeric
data.category = pd.Categorical(data.category)
data['code'] = data.category.cat.codes

from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2, random_state=200)

## 2 -- Data Visualisation

print(data.code.unique())

import matplotlib.pyplot as plt

train['code'].value_counts().plot(kind = 'bar')
DATA_COLUMN = 'text'
LABEL_COLUMN = 'code'
label_list = [0, 1, 2, 3, 4]
plt.show()

## 2 -- Data Preprocessing

train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None,
                                                                   text_a = x[DATA_COLUMN],
                                                                   text_b = None,
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None,
                                                                   text_a = x[DATA_COLUMN],
                                                                   text_b = None,
                                                                   label = x[LABEL_COLUMN]), axis = 1)

# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
    with tf.compat.v1.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])

  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128

# Convert our train and validation features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)


#Example on first observation in the training set
print("Example of train[0] as a training set")
print("Sentence : ", train_InputExamples.iloc[0].text_a)
print("-"*30)
print("Tokens : ", tokenizer.tokenize(train_InputExamples.iloc[0].text_a))
print("-"*30)
print("Input IDs : ", train_features[0].input_ids)
print("-"*30)
print("Input Masks : ", train_features[0].input_mask)
print("-"*30)
print("Segment IDs : ", train_features[0].segment_ids)


## 3. Creating a Multiclass Classifier
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.compat.v1.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.compat.v1.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.compat.v1.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

#A function that adapts our model to work for training, evaluation, and prediction.

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics.
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.compat.v1.metrics.accuracy(label_ids, predicted_labels)
        true_pos = tf.compat.v1.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.compat.v1.metrics.true_negatives(
            label_ids,
            predicted_labels)
        false_pos = tf.compat.v1.metrics.false_positives(
            label_ids,
            predicted_labels)
        false_neg = tf.compat.v1.metrics.false_negatives(
            label_ids,
            predicted_labels)

        return {
            "eval_accuracy": accuracy,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
            }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn

# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where the learning rate is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 300
SAVE_SUMMARY_STEPS = 100

# Compute train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)


#Initializing the model and the estimator
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

# Create an input function for validating. drop_remainder = True for using TPUs.
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)


# #Training the model
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

#Evaluating the model with Validation set
accuracy = estimator.evaluate(input_fn=test_input_fn, steps=None)


# A method to get predictions
def getPrediction(in_sentences):
    # A list to map the actual labels to the predictions
    labels = ["business", "entertainment", "politics", "sports", "tech"]


    # Transforming the test data into BERT accepted form
    input_examples = [run_classifier.InputExample(guid="", text_a=x, text_b=None, label=0) for x in in_sentences]

    # Creating input features for Test data
    input_features = run_classifier.convert_examples_to_features(input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)

    # Predicting the classes
    predict_input_fn = run_classifier.input_fn_builder(features=input_features, seq_length=MAX_SEQ_LENGTH,
                                                       is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'], prediction['labels'], labels[prediction['labels']]) for
            sentence, prediction in zip(in_sentences, predictions)]
pred_sentences = list(test['text'])

predictions = getPrediction(pred_sentences)

enc_labels = []
act_labels = []
for i in range(len(predictions)):
  enc_labels.append(predictions[i][2])
  act_labels.append(predictions[i][3])

pd.DataFrame(enc_labels, columns = ['category']).to_excel('data/submission_bert.xlsx', index = False)

## Random tester
#Classifying random sentences
tests = getPrediction(['Mr.Modi is the Indian Prime Minister',
                       'Gaming machines are powered by efficient micro processores and GPUs',
                       'That HBO TV series is really good',
                       'A trillion dollar economy '
                       ])

正如问题明确指出要保存模型，它的工作原理如下：

import torch
torch.save(model, 'path/to/model')

saved_model = torch.load('path/to/model')

本文内容由网友自发贡献，版权归原作者所有，本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容，请联系:hwhale#tublm.com(使用前将#替换为@)

python

tensorflow

machinelearning

machinelearningmodel

保存“微调”的 bert 模型的相关文章

Pycharm Python 控制台不打印输出

我有一个从 Pycharm python 控制台调用的函数但没有显示输出 In 2 def problem1 6 for i in range 1 101 2 print i end In 3 problem1 6 In 4 另一方面像
如何使用 Scrapy 从网站获取所有纯文本？

我希望在 HTML 呈现后可以从网站上看到所有文本我正在使用 Scrapy 框架使用 Python 工作和xpath body text 我能够获取它但是带有 HTML 标签而且我只想要文本有什么解决办法吗最简单的选择是ext
为 pandas 数据透视表中的每个值列定义 aggfunc

试图生成具有多个值列的数据透视表我知道我可以使用 aggfunc 按照我想要的方式聚合值但是如果我不想对两列求和或求平均值而是想要一列的总和同时求另一列的平均值该怎么办那么使用 pandas 可以做到这一点吗 df pd D
从 scikit-learn 导入 make_blobs [重复]

这个问题在这里已经有答案了我收到下一个警告 D Programming Python ML venv lib site packages sklearn utils deprecation py 77 DeprecationWarning
在循环中每次迭代开始时将变量重新分配给原始值（在循环之前定义）

在Python中你使用在每次迭代开始时将变量重新分配给原始值在循环之前定义时也就是说 original 1D o o o for i in range 0 3 new original 1D revert back to orig
从列表中的数据框列中搜索部分字符串匹配 - Pandas - Python

我有一个清单 things A1 B2 C3 我有一个 pandas 数据框其中有一列包含用分号分隔的值某些行将包含与上面列表中的一项的匹配它不会是完美的匹配因为它在其中包含字符串的其他部分该列例如该列中的一行可能有哇这里
在pyyaml中表示具有相同基类的不同类的实例

我有一些单元测试集希望将每个测试运行的结果存储为 YAML 文件以供进一步分析 YAML 格式的转储数据在几个方面满足我的需求但测试属于不同的套装结果有不同的父类这是我所拥有的示例 gt gt gt rz shorthand for
Abaqus 将曲面转化为集合

我一直试图在模型中找到两个表面的中心参见照片但未能成功它们是元素表面面查询中没有选项可以查找元素表面的中心只能查找元素集的中心找到节点集的中心也很好但是我的节点集没有出现在工具 gt 查询 gt 质量属性选项中而且我找不到
在tensorflow.js中对张量进行分区、屏蔽或过滤

我有 2 个相同长度的张量 data and groupIds 我想分开data通过相应的值分成几组groupId 例如 const data tf tensor 1 2 3 4 5 const groupIds tf tensor 0 1
Python：尝试检查有效的电话号码

我正在尝试编写一个接受以下格式的电话号码的程序XXX XXX XXXX并将条目中的任何字母翻译为其相应的数字现在我有了这个如果启动不正确它将允许您重新输入正确的数字然后它会翻译输入的原始数字我该如何解决 def main phon
Python - 在窗口最小化或隐藏时使用 pywinauto 控制窗口

我正在尝试做的事情我正在尝试使用 pywinauto 在 python 中创建一个脚本以在后台自动安装 notepad 隐藏或最小化 notepad 只是一个示例因为我将编辑它以与其他软件一起使用 Problem 问题是我想在安装程序
Numpy 优化

我有一个根据条件分配值的函数我的数据集大小通常在 30 50k 范围内我不确定这是否是使用 numpy 的正确方法但是当数字超过 5k 时它会变得非常慢有没有更好的方法让它更快 import numpy as np N 5000
如何在 Django 中使用并发进程记录到单个文件而不使用独占锁

给定一个在多个服务器上同时执行的 Django 应用程序该应用程序如何记录到单个共享日志文件在网络共享中而不保持该文件以独占模式永久打开当您想要利用日志流时这种情况适用于 Windows Azure 网站上托管的 Django 应
Python：计算字典的重复值

我有一本字典如下 dictA unit1 test1 alpha unit1 test2 beta unit2 test1 alpha unit2 test2 gamma unit3 test1 delta unit3 test2 gamm
glpk.LPX 向后兼容性？

较新版本的glpk没有LPXapi 旧包需要它我如何使用旧包例如COBRA http opencobra sourceforge net openCOBRA Welcome html 与较新版本的glpk 注意COBRA适用于 MATL
对输入求 Keras 模型的导数返回全零

所以我有一个 Keras 模型我想将模型的梯度应用于其输入这就是我所做的 import tensorflow as tf from keras models import Sequential from keras layers imp
在python中，如何仅搜索所选子字符串之前的一个单词

给定文本文件中的长行列表我只想返回紧邻其前面的子字符串例如单词狗描述狗的单词例如假设有这些行包含狗 hotdog big dog is dogged dog spy with my dog brown dogs 在这种情况下期望
如何使用google colab在jupyter笔记本中显示GIF？

我正在使用 google colab 想嵌入一个 gif 有谁知道如何做到这一点我正在使用下面的代码它并没有在笔记本中为 gif 制作动画我希望笔记本是交互式的这样人们就可以看到代码的动画效果而无需运行它我发现很多方法在 Goo
Spark.read 在 Databricks 中给出 KrbException

我正在尝试从 databricks 笔记本连接到 SQL 数据库以下是我的代码 jdbcDF spark read format com microsoft sqlserver jdbc spark option url jdbc sql
Python：元类属性有时会覆盖类属性？

下面代码的结果让我感到困惑 class MyClass type property def a self return 1 class MyObject object metaclass MyClass a 2 print MyObject

随机推荐

如何处理 Spring ProviderManager 中抛出的 spring security InternalAuthenticationServiceException

ProviderManager 在 DaoAuthenticationProvider class 中检索用户时抛出 InternalAuthenticationServiceException class loadedUser this
Typescript Jest 模拟：xx.default 不是构造函数：无法实例化模拟

我在尝试模拟类和构造函数时遇到问题我有一个要测试的 App ts 类 class App public server Express constructor this server new Express this server init
有选择地删除上传到 Azure blob 的图像（Django/Python 项目）

在 Django Python 项目中我使用 Azure blob 来存储用户上传的照片代码如下 from azure storage blob import BlobService blob service BlobService a
Maven 依赖项未下载，代理问题？

当前 WinHTTP 代理设置在 Windows 计算机中直接访问无代理服务器并在 settings xml 中作为但是 jar 没有被下载并出现以下错误 ERROR Plugin org apache maven plugins
使用 Rhohub 构建的 .apk 文件无法在设备上运行

在我的设备中安装 apk 文件后当我尝试打开它时它说不幸的是应用程序已停止它在我的 Rhomobile 的 Android Rhosimulator 上运行良好但我不明白为什么它这么说更新今天我在本地计算机上构建了 apk 文
正则表达式/“正则表达式”中的正则是什么意思？

正则表达式中的正则是什么意思我听说正则表达式曾经是常规的但现在不再了 The regular正则表达式中来自于它匹配常规语言 http en wikipedia org wiki Regular language 的概念形式语言理
pthread_create 中的多个参数

根据 pthread create 手册页该函数的参数是 int pthread create pthread t thread const pthread attr t attr void start routine void void
如何在 Micronaut 应用程序中指定配置文件？

我想根据我将部署应用程序的环境指定不同类型的配置就像在Spring boot中的yml文件中我们可以设置配置文件一样我想知道是否有办法在Micronaut中做到这一点您可以通过以下方式设置活动环境系统属性 micronaut envi
更改占位符文本

如何更改输入元素的占位符文本例如我有 3 个文本类型的输入
如何将 GitHub wiki 存储为源代码的一部分

GitHub 以及许多git服务器例如 GitLab 提供项目级 wiki 其中通常包含 markdown md 文件被存储并形成好吧你的项目的wiki 这将是so cool如果有一种方法可以将您的 wiki 存储为主项目源代码的一部
使用 jQuery 进行 .NET 日期验证

我需要对日期进行一些基本验证例如验证日期是否大于今天验证日期是否真实即99 99 9999 等基本上我需要模拟 NET 的用途DateTime TryParse jQuery 中有类似的东西吗或者我最好进行 AJAX 调用来验证
socket.io 解析连接 (>= 2.4.1) 签名的会话 cookie

使用最新版本的 connect 截至 2012 年 7 月 26 日我找到了以下方法来从 socket io 获取可与 connect redis 存储一起使用的会话 ID var express require express rout
ARKit - 获取相机到锚点的距离

我正在创建一个锚点并将其添加到相机前面一定距离处的 ARSKView 中如下所示 func displayToken distance Float print token dropped at distance guard let sce
Android 中的静态单例生命周期

我有一些不清楚的情况最后一个持有 Activity 的引用被销毁后静态单例会被垃圾回收吗因为Application中没有更多对单例实例的引用那么我可以依赖单身人士吗由官方提供安卓文档 http developer android
Firefox 中的 SVG 过滤器

由于某些原因我无法让我的 SVG 过滤器在 Firefox 中工作然而它们在 Opera 中工作得很好我将其属性设置为过滤器的元素就消失了这很奇怪这是我的 JavaScript 代码 defsElement SVGDoc crea
禁用在发布设置上将诊断数据发送到应用程序见解

我在 vs2013 上创建了一个 Web 角色最近将其升级到 vs2015 将角色发布到 Azure 时 send diagnostics data to application insights已打开由于我不想每次都使用应用程序见解
在 Scalatest 失败时执行专门的功能

我正在使用 selenium 在 Scala Web 应用程序上执行集成测试每当测试失败时我想将页面的 html 截图或打印到控制台中我当前的设置是使用 Selenium 2 0 和 Spec 进行 Scalatest 是否有办法通过
迁移php4/mysql4到php5/mysql5：切换到InnoDB？

我有一个遗留的Web应用程序php4 mysql4 MyISAM 数据库包含一些cms 一些用户数据一些日历应用程序现在我要迁移到带有 php5 mysql5 的新服务器迁移mysql数据库时是否应该更改为InnoDB 预期的优点缺
使用正则表达式和 php 删除除 Internet Explorer 注释之外的所有 html 注释

我是正则表达式新手但需要一个可以删除所有 html 注释的代码但不是像我有这个代码第369章
保存“微调”的 bert 模型

我正在尝试保存一个经过微调的 bert 模型我已经正确运行了代码它工作正常并且在 ipython 控制台中我可以调用 getPrediction 并让它产生结果我保存了体重文件最高的是 model ckpt 333 data 00

保存“微调”的 bert 模型

保存“微调”的 bert 模型 的相关文章

随机推荐

热门标签

保存“微调”的 bert 模型的相关文章