Customize Multi Output Model¶
It is very easy to customize your own multi output model. Lets assume you have dataset like this, One input and two output.
Example code at file tests/test_custom_multi_output_classification.py
.
x = [
['我', '想', '全', '部', '回', '复'],
['我', '想', '上', 'q', 'q', '了'],
['你', '去', '过', '赌', '场', '吗'],
['是', '我', '是', '说', '你', '有', '几', '个', '兄', '弟', '姐', '妹', '不', '是', '你', '自', '己', '说'],
['广', '西', '新', '闻', '网']
]
output_1 = [
[0. 0. 1.]
[1. 0. 0.]
[1. 0. 0.]
[0. 0. 1.]
[1. 0. 0.]]
output_2 = [
[0. 1. 0.]
[0. 0. 1.]
[0. 0. 1.]
[1. 0. 0.]
[0. 0. 1.]]
Then you need to create a customized processor inherited from the ClassificationProcessor
.
import kashgari
import numpy as np
from typing import Tuple, List, Optional, Dict, Any
from kashgari.processors.classification_processor import ClassificationProcessor
class MultiOutputProcessor(ClassificationProcessor):
def process_y_dataset(self,
data: Tuple[List[List[str]], ...],
maxlens: Optional[Tuple[int, ...]] = None,
subset: Optional[List[int]] = None) -> Tuple[np.ndarray, ...]:
# Data already converted to one-hot
# Only need to get the subset
result = []
for index, dataset in enumerate(data):
if subset is not None:
target = kashgari.utils.get_list_subset(dataset, subset)
else:
target = dataset
result.append(np.array(target))
if len(result) == 1:
return result[0]
else:
return tuple(result)
Then build your own model inherited from the BaseClassificationModel
import kashgari
import tensorflow as tf
from typing import Tuple, List, Optional, Dict, Any
from kashgari.layers import L
from kashgari.tasks.classification.base_model import BaseClassificationModel
class MultiOutputModel(BaseClassificationModel):
@classmethod
def get_default_hyper_parameters(cls) -> Dict[str, Dict[str, Any]]:
return {
'layer_bi_lstm': {
'units': 256,
'return_sequences': False
}
}
# Build your own model
def build_model_arc(self):
config = self.hyper_parameters
embed_model = self.embedding.embed_model
layer_bi_lstm = L.Bidirectional(L.LSTM(**config['layer_bi_lstm']), name='layer_bi_lstm')
layer_output_1 = L.Dense(3, activation='sigmoid', name='layer_output_1')
layer_output_2 = L.Dense(3, activation='sigmoid', name='layer_output_2')
tensor = layer_bi_lstm(embed_model.output)
output_tensor_1 = layer_output_1(tensor)
output_tensor_2 = layer_output_2(tensor)
self.tf_model = tf.keras.Model(embed_model.inputs, [output_tensor_1, output_tensor_2])
# Rewrite your predict function
def predict(self,
x_data,
batch_size=None,
debug_info=False,
threshold=0.5):
tensor = self.embedding.process_x_dataset(x_data)
pred = self.tf_model.predict(tensor, batch_size=batch_size)
output_1 = pred[0]
output_2 = pred[1]
output_1[output_1 >= threshold] = 1
output_1[output_1 < threshold] = 0
output_2[output_2 >= threshold] = 1
output_2[output_2 < threshold] = 0
return output_1, output_2
Tada, all done, Now build your own model with customized processor
from kashgari.embeddings import BareEmbedding
# Use your processor to init embedding, You can use any embedding layer provided by kashgari here
processor = MultiOutputProcessor()
embedding = BareEmbedding(processor=processor)
m = MultiOutputModel(embedding=embedding)
m.build_model(train_x, (output_1, output_2))
m.fit(train_x, (output_1, output_2))