thiscodeWorks - Organizing the best of code online

List view

#python #openvino #openvino-notebooks #deeplearning #accelerated-inference #nlp #entity-recognition #bert

204-named-entity-recognition: Named Entity Recognition with OpenVINO

# Imports
import time
import json

import numpy as np
import tokens_bert as tokens

from openvino.runtime import Core
from openvino.runtime import Dimension

# Download the model
# directory where model will be downloaded
base_model_dir = "model"

# desired precision
precision = "FP16-INT8"

# model name as named in Open Model Zoo
model_name = "bert-small-uncased-whole-word-masking-squad-int8-0002"

model_path = f"model/intel/{model_name}/{precision}/{model_name}.xml"
model_weights_path = f"model/intel/{model_name}/{precision}/{model_name}.bin"

download_command = f"omz_downloader " \
                   f"--name {model_name} " \
                   f"--precision {precision} " \
                   f"--output_dir {base_model_dir} " \
                   f"--cache_dir {base_model_dir}"
! $download_command

# Load the model for Entity Extraction with Dynamic Shape
# initialize inference engine
ie_core = Core()
# read the network and corresponding weights from file
model = ie_core.read_model(model=model_path, weights=model_weights_path)

# assign dynamic shapes to every input layer on the last dimension
for input_layer in model.inputs:
    input_shape = input_layer.partial_shape
    input_shape[1] = Dimension(1, 384)
    model.reshape({input_layer: input_shape})

# compile the model for the CPU
compiled_model = ie_core.compile_model(model=model, device_name="CPU")

# get input names of nodes
input_keys = list(compiled_model.inputs)

# Processing
# path to vocabulary file
vocab_file_path = "data/vocab.txt"

# create dictionary with words and their indices
vocab = tokens.load_vocab_file(vocab_file_path)

# define special tokens
cls_token = vocab["[CLS]"]
sep_token = vocab["[SEP]"]

# set a confidence score threshold
confidence_threshold = 0.4

# Preprocessing
# generator of a sequence of inputs
def prepare_input(entity_tokens, context_tokens):
    input_ids = [cls_token] + entity_tokens + [sep_token] + \
        context_tokens + [sep_token]
    # 1 for any index
    attention_mask = [1] * len(input_ids)
    # 0 for entity tokens, 1 for context part
    token_type_ids = [0] * (len(entity_tokens) + 2) + \
        [1] * (len(context_tokens) + 1)

    # create input to feed the model
    input_dict = {
        "input_ids": np.array([input_ids], dtype=np.int32),
        "attention_mask": np.array([attention_mask], dtype=np.int32),
        "token_type_ids": np.array([token_type_ids], dtype=np.int32),
    }

    # some models require additional position_ids
    if "position_ids" in [i_key.any_name for i_key in input_keys]:
        position_ids = np.arange(len(input_ids))
        input_dict["position_ids"] = np.array([position_ids], dtype=np.int32)

    return input_dict

# Postprocessing
def postprocess(output_start, output_end, entity_tokens,
                context_tokens_start_end, input_size):

    def get_score(logits):
        out = np.exp(logits)
        return out / out.sum(axis=-1)

    # get start-end scores for context
    score_start = get_score(output_start)
    score_end = get_score(output_end)

    # index of first context token in tensor
    context_start_idx = len(entity_tokens) + 2
    # index of last+1 context token in tensor
    context_end_idx = input_size - 1

    # find product of all start-end combinations to find the best one
    max_score, max_start, max_end = find_best_entity_window(
        start_score=score_start, end_score=score_end,
        context_start_idx=context_start_idx, context_end_idx=context_end_idx
    )

    # convert to context text start-end index
    max_start = context_tokens_start_end[max_start][0]
    max_end = context_tokens_start_end[max_end][1]

    return max_score, max_start, max_end


def find_best_entity_window(start_score, end_score,
                            context_start_idx, context_end_idx):
    context_len = context_end_idx - context_start_idx
    score_mat = np.matmul(
        start_score[context_start_idx:context_end_idx].reshape(
            (context_len, 1)),
        end_score[context_start_idx:context_end_idx].reshape(
            (1, context_len)),
    )
    # reset candidates with end before start
    score_mat = np.triu(score_mat)
    # reset long candidates (>16 words)
    score_mat = np.tril(score_mat, 16)
    # find the best start-end pair
    max_s, max_e = divmod(score_mat.flatten().argmax(), score_mat.shape[1])
    max_score = score_mat[max_s, max_e]

    return max_score, max_s, max_e

def get_best_entity(entity, context, vocab):
    # convert context string to tokens
    context_tokens, context_tokens_end = tokens.text_to_tokens(
        text=context.lower(), vocab=vocab)
    # convert entity string to tokens
    entity_tokens, _ = tokens.text_to_tokens(text=entity.lower(), vocab=vocab)

    network_input = prepare_input(entity_tokens, context_tokens)
    input_size = len(context_tokens) + len(entity_tokens) + 3

    # openvino inference
    output_start_key = compiled_model.output("output_s")
    output_end_key = compiled_model.output("output_e")
    result = compiled_model(network_input)

    # postprocess the result getting the score and context range for the answer
    score_start_end = postprocess(output_start=result[output_start_key][0],
                                  output_end=result[output_end_key][0],
                                  entity_tokens=entity_tokens,
                                  context_tokens_start_end=context_tokens_end,
                                  input_size=input_size)

    # return the part of the context, which is already an answer
    return context[score_start_end[1]:score_start_end[2]], score_start_end[0]

# Set the Entity Recognition Template
template = ["building", "company", "persons", "city",
            "state", "height", "floor", "address"]

def run_analyze_entities(context):
    print(f"Context: {context}\n", flush=True)

    if len(context) == 0:
        print("Error: Empty context or outside paragraphs")
        return

    if len(context) > 380:
        print("Error: The context is too long for this particular model. "
              "Try with context shorter than 380 words.")
        return

    # measure processing time
    start_time = time.perf_counter()
    extract = []
    for field in template:
        entity_to_find = field + "?"
        entity, score = get_best_entity(entity=entity_to_find,
                                        context=context,
                                        vocab=vocab)
        if score >= confidence_threshold:
            extract.append({"Entity": entity, "Type": field,
                            "Score": f"{score:.2f}"})
    end_time = time.perf_counter()
    res = {"Extraction": extract, "Time": f"{end_time - start_time:.2f}s"}
    print("\nJSON Output:")
    print(json.dumps(res, sort_keys=False, indent=4))

# Run on Simple Text
# Sample 1
source_text = "Intel Corporation is an American multinational and technology" \
    " company headquartered in Santa Clara, California."
run_analyze_entities(source_text)

# Sample 2
source_text = "Intel was founded in Mountain View, California, " \
    "in 1968 by Gordon E. Moore, a chemist, and Robert Noyce, " \
    "a physicist and co-inventor of the integrated circuit."
run_analyze_entities(source_text)

# Sample 3
source_text = "The Robert Noyce Building in Santa Clara, California, " \
    "is the headquarters for Intel Corporation. It was constructed in 1992 " \
    "and is located at 2200 Mission College Boulevard - 95054. It has an " \
    "estimated height of 22.20 meters and 6 floors above ground."
run_analyze_entities(source_text)

#python #openvino #openvino-notebooks #deeplearning #accelerated-inference ##nlp #question-answering #bert

213-question-answering: Interactive Question Answering with OpenVINO

# Imports
import operator
import time
from urllib import parse

import numpy as np
from openvino.runtime import Core

import html_reader as reader
import tokens_bert as tokens

# Download the model
# directory where model will be downloaded
base_model_dir = "model"

# desired precision
precision = "FP16-INT8"

# model name as named in Open Model Zoo
model_name = "bert-small-uncased-whole-word-masking-squad-int8-0002"

model_path = f"model/intel/{model_name}/{precision}/{model_name}.xml"
model_weights_path = f"model/intel/{model_name}/{precision}/{model_name}.bin"

download_command = f"omz_downloader " \
                   f"--name {model_name} " \
                   f"--precision {precision} " \
                   f"--output_dir {base_model_dir} " \
                   f"--cache_dir {base_model_dir}"
! $download_command

# Load the model
# initialize inference engine
core = Core()
# read the network and corresponding weights from file
model = core.read_model(model=model_path, weights=model_weights_path)
# load the model on the CPU (you can use GPU as well)
compiled_model = core.compile_model(model=model, device_name="CPU")

# get input and output names of nodes
input_keys = list(compiled_model.inputs)
output_keys = list(compiled_model.outputs)

# get network input size
input_size = compiled_model.input(0).shape[1]

# Processing
# path to vocabulary file
vocab_file_path = "data/vocab.txt"

# create dictionary with words and their indices
vocab = tokens.load_vocab_file(vocab_file_path)

# define special tokens
cls_token = vocab["[CLS]"]
pad_token = vocab["[PAD]"]
sep_token = vocab["[SEP]"]


# function to load text from given urls
def load_context(sources):
    input_urls = []
    paragraphs = []
    for source in sources:
        result = parse.urlparse(source)
        if all([result.scheme, result.netloc]):
            input_urls.append(source)
        else:
            paragraphs.append(source)

    paragraphs.extend(reader.get_paragraphs(input_urls))
    # produce one big context string
    return "\n".join(paragraphs)

# Preprocessing
# generator of a sequence of inputs
def prepare_input(question_tokens, context_tokens):
    # length of question in tokens
    question_len = len(question_tokens)
    # context part size
    context_len = input_size - question_len - 3

    if context_len < 16:
        raise RuntimeError("Question is too long in comparison to input size. No space for context")

    # take parts of context with overlapping by 0.5
    for start in range(0, max(1, len(context_tokens) - context_len), context_len // 2):
        # part of context
        part_context_tokens = context_tokens[start:start + context_len]
        # input: question and context separated by special tokens
        input_ids = [cls_token] + question_tokens + [sep_token] + part_context_tokens + [sep_token]
        # 1 for any index if there is no padding token, 0 otherwise
        attention_mask = [1] * len(input_ids)
        # 0 for question tokens, 1 for context part
        token_type_ids = [0] * (question_len + 2) + [1] * (len(part_context_tokens) + 1)

        # add padding at the end
        (input_ids, attention_mask, token_type_ids), pad_number = pad(input_ids=input_ids,
                                                                      attention_mask=attention_mask,
                                                                      token_type_ids=token_type_ids)

        # create input to feed the model
        input_dict = {
            "input_ids": np.array([input_ids], dtype=np.int32),
            "attention_mask": np.array([attention_mask], dtype=np.int32),
            "token_type_ids": np.array([token_type_ids], dtype=np.int32),
        }

        # some models require additional position_ids
        if "position_ids" in [i_key.any_name for i_key in input_keys]:
            position_ids = np.arange(len(input_ids))
            input_dict["position_ids"] = np.array([position_ids], dtype=np.int32)

        yield input_dict, pad_number, start


# function to add padding
def pad(input_ids, attention_mask, token_type_ids):
    # how many padding tokens
    diff_input_size = input_size - len(input_ids)

    if diff_input_size > 0:
        # add padding to all inputs
        input_ids = input_ids + [pad_token] * diff_input_size
        attention_mask = attention_mask + [0] * diff_input_size
        token_type_ids = token_type_ids + [0] * diff_input_size

    return (input_ids, attention_mask, token_type_ids), diff_input_size

# Postprocessing
# based on https://github.com/openvinotoolkit/open_model_zoo/blob/bf03f505a650bafe8da03d2747a8b55c5cb2ef16/demos/common/python/openvino/model_zoo/model_api/models/bert.py#L163
def postprocess(output_start, output_end, question_tokens, context_tokens_start_end, padding, start_idx):

    def get_score(logits):
        out = np.exp(logits)
        return out / out.sum(axis=-1)

    # get start-end scores for context
    score_start = get_score(output_start)
    score_end = get_score(output_end)

    # index of first context token in tensor
    context_start_idx = len(question_tokens) + 2
    # index of last+1 context token in tensor
    context_end_idx = input_size - padding - 1

    # find product of all start-end combinations to find the best one
    max_score, max_start, max_end = find_best_answer_window(start_score=score_start,
                                                            end_score=score_end,
                                                            context_start_idx=context_start_idx,
                                                            context_end_idx=context_end_idx)

    # convert to context text start-end index
    max_start = context_tokens_start_end[max_start + start_idx][0]
    max_end = context_tokens_start_end[max_end + start_idx][1]

    return max_score, max_start, max_end


# based on https://github.com/openvinotoolkit/open_model_zoo/blob/bf03f505a650bafe8da03d2747a8b55c5cb2ef16/demos/common/python/openvino/model_zoo/model_api/models/bert.py#L188
def find_best_answer_window(start_score, end_score, context_start_idx, context_end_idx):
    context_len = context_end_idx - context_start_idx
    score_mat = np.matmul(
        start_score[context_start_idx:context_end_idx].reshape((context_len, 1)),
        end_score[context_start_idx:context_end_idx].reshape((1, context_len)),
    )
    # reset candidates with end before start
    score_mat = np.triu(score_mat)
    # reset long candidates (>16 words)
    score_mat = np.tril(score_mat, 16)
    # find the best start-end pair
    max_s, max_e = divmod(score_mat.flatten().argmax(), score_mat.shape[1])
    max_score = score_mat[max_s, max_e]

    return max_score, max_s, max_e

def get_best_answer(question, context):
    # convert context string to tokens
    context_tokens, context_tokens_start_end = tokens.text_to_tokens(text=context.lower(),
                                                                     vocab=vocab)
    # convert question string to tokens
    question_tokens, _ = tokens.text_to_tokens(text=question.lower(), vocab=vocab)

    results = []
    # iterate through different parts of context
    for network_input, padding, start_idx in prepare_input(question_tokens=question_tokens,
                                                           context_tokens=context_tokens):
        # get output layers
        output_start_key = compiled_model.output("output_s")
        output_end_key = compiled_model.output("output_e")

        # openvino inference
        result = compiled_model(network_input)
        # postprocess the result getting the score and context range for the answer
        score_start_end = postprocess(output_start=result[output_start_key][0],
                                      output_end=result[output_end_key][0],
                                      question_tokens=question_tokens,
                                      context_tokens_start_end=context_tokens_start_end,
                                      padding=padding,
                                      start_idx=start_idx)
        results.append(score_start_end)

    # find the highest score
    answer = max(results, key=operator.itemgetter(0))
    # return the part of the context, which is already an answer
    return context[answer[1]:answer[2]], answer[0]

# Main Processing Function
def run_question_answering(sources):
    print(f"Context: {sources}", flush=True)
    context = load_context(sources)

    if len(context) == 0:
        print("Error: Empty context or outside paragraphs")
        return

    while True:
        question = input()
        # if no question - break
        if question == "":
            break

        # measure processing time
        start_time = time.perf_counter()
        answer, score = get_best_answer(question=question, context=context)
        end_time = time.perf_counter()

        print(f"Question: {question}")
        print(f"Answer: {answer}")
        print(f"Score: {score:.2f}")
        print(f"Time: {end_time - start_time:.2f}s")

# Run on local paragraphs
sources = ["Computational complexity theory is a branch of the theory of computation in theoretical computer "
           "science that focuses on classifying computational problems according to their inherent difficulty, "
           "and relating those classes to each other. A computational problem is understood to be a task that "
           "is in principle amenable to being solved by a computer, which is equivalent to stating that the "
           "problem may be solved by mechanical application of mathematical steps, such as an algorithm."]

run_question_answering(sources)

# Run on websites
sources = ["https://en.wikipedia.org/wiki/OpenVINO"]

run_question_answering(sources)

204-named-entity-recognition: Named Entity Recognition with OpenVINO

213-question-answering: Interactive Question Answering with OpenVINO

Save snippets that work with our extensions