Snippets Collections
# Install the latest release of Haystack in your own environment
#! pip install farm-haystack

# Install the latest main of Haystack
!pip install --upgrade pip
!pip install git+[colab]

# Imports needed to run this notebook

from pprint import pprint
from tqdm import tqdm
from haystack.nodes import QuestionGenerator, BM25Retriever, FARMReader
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.pipelines import (
from haystack.utils import launch_es, print_questions
 # Option 2: In Colab / No Docker environments: Start Elasticsearch from source
! wget -q
! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.9.2

import os
from subprocess import Popen, PIPE, STDOUT

es_server = Popen(
    ["elasticsearch-7.9.2/bin/elasticsearch"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1)  # as daemon
# wait until ES has started
! sleep 30

!tar -xvf xpdf-tools-linux-4.04.tar.gz && sudo cp xpdf-tools-linux-4.04/bin64/pdftotext /usr/local/bin

from haystack.nodes import TextConverter, PDFToTextConverter, DocxToTextConverter, PreProcessor

converter = TextConverter(remove_numeric_tables=True, valid_languages=["nl"])
doc_txt = converter.convert(file_path="/content/data/Chatbot_BVO DDK_22092022.txt", meta=None)[0]

from haystack.nodes import PreProcessor

# This is a default usage of the PreProcessor.
# Here, it performs cleaning of consecutive whitespaces
# and splits a single large document into smaller documents.
# Each document is up to 1000 words long and document breaks cannot fall in the middle of sentences
# Note how the single document passed into the document gets split into 5 smaller documents

preprocessor = PreProcessor(
docs = preprocessor.process([doc_txt])
print(f"n_docs_input: 1\nn_docs_output: {len(docs)}")

# Initialize Question Generator
question_generator = QuestionGenerator()

# Fill the document store with a German document.

# Load machine translation models
from haystack.nodes import TransformersTranslator
in_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-nl-en")
out_translator = TransformersTranslator(model_name_or_path="Helsinki-NLP/opus-mt-en-nl")

reader = FARMReader("deepset/roberta-base-squad2")
qag_pipeline = QuestionAnswerGenerationPipeline(question_generator, reader)

# Wrap the previously defined QuestionAnswerGenerationPipeline
from haystack.pipelines import TranslationWrapperPipeline

pipeline_with_translation = TranslationWrapperPipeline(
    input_translator=in_translator, output_translator=out_translator, pipeline=qag_pipeline

for idx, document in enumerate(tqdm(document_store)):
    print(f"\n * Generating questions and answers for document {idx}: {document.content[:100]}...\n")
    result =[document])
$(document).ready(function() {
  var playing = false;

  // Add file names.
  $('.audio-button').each(function() {
    var $button = $(this);    
    var $audio = $button.find('audio');

  // Add click listener.
  $('.audio-button').click(function() {
    var $button = $(this);    
    var audio = $button.find('audio')[0]; // <-- Interact with this!
    // Toggle play/pause
    if (playing !== true) {;
    } else {

    // Flip state
    playing = !playing

// CSS {
  position:      relative;
  display:       block;
  width:         12em;
  height:        2.25em;
  margin-bottom: 0.125em;
  text-align:    left;
} {
  position:      absolute;
  right:         0.8em;
  content:       "\f04b";
} {

<link href="" rel="stylesheet"/>
<script src=""></script>

<button class="fa audio-button">
  <audio src="media/test.mp3"></audio>
<button class="fa audio-button">
  <audio src="media/test2.mp3"></audio>
<button class="fa audio-button">
  <audio src="media/test3.mp3"></audio>
<button class="fa audio-button">
  <audio src="media/test4.mp3"></audio>
# Add the tables to the DocumentStore

import json
from haystack import Document
import pandas as pd

def read_tables(filename):
    processed_tables = []
    with open(filename) as tables:
        tables = json.load(tables)
        for key, table in tables.items():
            current_columns = table["header"]
            current_rows = table["data"]
            current_df = pd.DataFrame(columns=current_columns, data=current_rows)
            document = Document(content=current_df, content_type="table", id=key)

    return processed_tables

tables = read_tables(f"{doc_dir}/tables.json")
document_store.write_documents(tables, index=document_index)

# Showing content field and meta field of one of the Documents of content_type 'table'
from IPython.display import YouTubeVideo

YOUTUBE_ID = 'xxxxxxxxxxxx'

!rm -rf *.wav
!youtube-dl --extract-audio --audio-format wav --output "downloaded.%(ext)s"\?v\={YOUTUBE_ID}
!ffmpeg -loglevel panic -y -i downloaded.wav -acodec pcm_s16le -ac 1 -ar 16000 {project_name}/test.wav
find . -name '*.wav' -exec ffmpeg -i '{}' -i city-traffic-outdoor.mp3 -filter_complex "[0:a][1:a]amerge=inputs=2[a]" -map "[a]" -ar 16000 -ac 1 '{}.city_traffic_outdoor.wav' \;
find . -name '*.wav' -exec ffmpeg -i '{}' -filter:a "highpass=f=1200" '{}.phone.wav' \;
find . -name '*.wav' -exec ffmpeg -i '{}' -ar 16000 -ac 1 '{}.mono.wav' \;
//MARK: - Document Picker

// From your project's capabilities, enable both the iCloud and the Key-Sharing.

@IBAction func pickDocumentPressed(_ sender: Any) {

extension OffersReceivedViewController : UIDocumentPickerDelegate,UINavigationControllerDelegate {
    func presentDocumentPicker() {
        let sTypes = getSupportedTypes()
        let documentPickerController = UIDocumentPickerViewController(
            forOpeningContentTypes: sTypes)
        documentPickerController.delegate = self
        documentPickerController.allowsMultipleSelection = false
        self.present(documentPickerController, animated: true) {
    public func documentPicker(_ controller: UIDocumentPickerViewController, didPickDocumentsAt urls: [URL]) {
        guard let myURL = urls.first else {
        print("import result : \(myURL)")
        print("get Image Url")
        fileUrl = myURL
    func documentPickerWasCancelled(_ controller: UIDocumentPickerViewController) {
        print("view was cancelled")
        dismiss(animated: true, completion: nil)
    func getSupportedTypes() -> [UTType] {
        let supportedTypes : [UTType] = [UTType.utf8TabSeparatedText, UTType.rtf,    UTType.pdf, UTType.webArchive, UTType.image, UTType.jpeg,    UTType.tiff, UTType.gif, UTType.png, UTType.bmp, UTType.ico,    UTType.rawImage, UTType.svg, UTType.livePhoto,,,, UTType.quickTimeMovie, UTType.mpeg,    UTType.mpeg2Video, UTType.mpeg2TransportStream, UTType.mp3,    UTType.mpeg4Movie, UTType.mpeg4Audio, UTType.avi, UTType.aiff,    UTType.wav, UTType.midi, UTType.archive, UTType.gzip, UTType.bz2,, UTType.appleArchive, UTType.spreadsheet, UTType.epub]
        return supportedTypes

Mon Oct 17 2022 11:58:22 GMT+0000 (UTC)

#javascript #audio #player #jquery

Mon Jun 27 2022 10:53:43 GMT+0000 (UTC)

#ios #swift #file #pdf #doc #document #picker #mp3 #mp4 #video #audio

Save snippets that work with our extensions

Available in the Chrome Web Store Get Firefox Add-on Get VS Code extension