{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# QuickStart"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "nbsphinx": "hidden"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The autoreload extension is already loaded. To reload it, use:\n",
      "  %reload_ext autoreload\n"
     ]
    }
   ],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "nbsphinx": "hidden",
    "slideshow": {
     "slide_type": "skip"
    }
   },
   "outputs": [],
   "source": [
    "import IPython.display as ipd\n",
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = \"all\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build Image Search In 5 Minutes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection_name = 'pokemon_images'\n",
    "\n",
    "documents = []\n",
    "for i in range(1, 20):\n",
    "    documents.append({\n",
    "        'image': 'https://assets.pokemon.com/assets/cms2/img/pokedex/full/{}.png'.format(f'{i:03}'),\n",
    "        'pokemon_id' : str(i),\n",
    "        '_id': i\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'status': 'complete', 'message': 'pokemon_images deleted'}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#1. specify the vdb client\n",
    "from vectorai.client import ViClient\n",
    "vi_client = ViClient(username, api_key, url)\n",
    "vi_client.delete_collection(collection_name)\n",
    "\n",
    "#2. specify an image encoder\n",
    "from vectorai.models.deployed import ViImage2Vec\n",
    "image_encoder = ViImage2Vec(username, api_key, url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "82cb6f5ed0d5415eac27727b6ee1bba8",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=1), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'inserted_successfully': 19, 'failed': 0, 'failed_document_ids': []}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#3. insert the documents and encode images simultaneously\n",
    "# using jobs means that the encoding process takes place on our servers as opposed to your computer\n",
    "use_jobs = False\n",
    "\n",
    "if use_jobs:\n",
    "    vi_client.insert_documents(collection_name, documents)\n",
    "    job = vi_client.encode_image_job(collection_name, 'image')\n",
    "    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])\n",
    "else:\n",
    "    vi_client.insert_documents(collection_name, documents, models={'image':image_encoder.encode})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>image</th>\n",
       "      <th>pokemon_id</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png\" width=\"150\" ></td>\n",
       "      <td>3</td>\n",
       "      <td>2020-10-02T07:07:18.799559</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/002.png\" width=\"150\" ></td>\n",
       "      <td>2</td>\n",
       "      <td>2020-10-02T07:07:18.797693</td>\n",
       "      <td>0.920337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/001.png\" width=\"150\" ></td>\n",
       "      <td>1</td>\n",
       "      <td>2020-10-02T07:07:18.795655</td>\n",
       "      <td>0.838996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>17</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/017.png\" width=\"150\" ></td>\n",
       "      <td>17</td>\n",
       "      <td>2020-10-02T07:07:30.587841</td>\n",
       "      <td>0.835111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>16</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/016.png\" width=\"150\" ></td>\n",
       "      <td>16</td>\n",
       "      <td>2020-10-02T07:07:30.585399</td>\n",
       "      <td>0.813012</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#4. search\n",
    "search_results = vi_client.search(collection_name,\n",
    "    image_encoder.encode('https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png'), \n",
    "    'image_vector_', page_size=5)\n",
    "\n",
    "#4.2 first result is the query audio itself\n",
    "vi_client.show_json(search_results, image_fields=['image'], image_width=150)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>image</th>\n",
       "      <th>pokemon_id</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/002.png\" width=\"150\" ></td>\n",
       "      <td>2</td>\n",
       "      <td>2020-10-02T07:07:18.797693</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/003.png\" width=\"150\" ></td>\n",
       "      <td>3</td>\n",
       "      <td>2020-10-02T07:07:18.799559</td>\n",
       "      <td>0.920337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/001.png\" width=\"150\" ></td>\n",
       "      <td>1</td>\n",
       "      <td>2020-10-02T07:07:18.795655</td>\n",
       "      <td>0.895991</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/007.png\" width=\"150\" ></td>\n",
       "      <td>7</td>\n",
       "      <td>2020-10-02T07:07:18.807199</td>\n",
       "      <td>0.839945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>17</td>\n",
       "      <td><img src=\"https://assets.pokemon.com/assets/cms2/img/pokedex/full/017.png\" width=\"150\" ></td>\n",
       "      <td>17</td>\n",
       "      <td>2020-10-02T07:07:30.587841</td>\n",
       "      <td>0.833277</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#5 recommendation by id\n",
    "search_by_id_results = vi_client.search_by_id(collection_name, '2', 'image_vector_', page_size=5)\n",
    "\n",
    "#5.2 first result is the id's audio itself\n",
    "vi_client.show_json(search_by_id_results, image_fields=['image'], image_width=150)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build Audio Search in 5 Minutes"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Building Audio search is easy with Vi!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection_name = 'audio_quickstart'\n",
    "\n",
    "#create the documents\n",
    "documents = []\n",
    "for i in range(1, 1001):\n",
    "    documents.append({\n",
    "        'audio': 'https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_{}.wav'.format(i),\n",
    "        'name' : 'common_voice_en_{}.wav'.format(i),\n",
    "        '_id': i\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Logged in. Welcome public-demo. To view list of available collections, call list_collections() method.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'status': 'complete', 'message': 'audio_quickstart deleted'}"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#1. specify the vdb client\n",
    "from vectorai.client import ViClient\n",
    "vi_client = ViClient(username, api_key, url)\n",
    "vi_client.delete_collection(collection_name)\n",
    "\n",
    "#2. specify an audio encoder\n",
    "from vectorai.models.deployed import ViAudio2Vec\n",
    "audio_encoder = ViAudio2Vec(username, api_key, url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\kda\\vectorai\\vectorai\\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\n",
      "  \"Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\"\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f1b5d6b126b4459facb92dadabb1764f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=66), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'inserted_successfully': 1000, 'failed': 0, 'failed_document_ids': []}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'status': 'Finished'}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Done'"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#3. insert the documents and encode audio simultaneously\n",
    "use_jobs = True\n",
    "\n",
    "if use_jobs:\n",
    "    vi_client.insert_documents(collection_name, documents)\n",
    "    job = vi_client.encode_audio_job(collection_name, 'audio')\n",
    "    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])\n",
    "else:\n",
    "    vi_client.insert_documents(collection_name, documents, models={'audio':audio_encoder.encode})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>name</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>audio</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>common_voice_en_1.wav</td>\n",
       "      <td>2020-10-02T07:07:34.725378</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_1.wav' type='audio/wav'></audio></td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>12</td>\n",
       "      <td>common_voice_en_12.wav</td>\n",
       "      <td>2020-10-02T07:07:34.726100</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_12.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.893219</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>32</td>\n",
       "      <td>common_voice_en_32.wav</td>\n",
       "      <td>2020-10-02T07:07:35.271638</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_32.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.891373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20</td>\n",
       "      <td>common_voice_en_20.wav</td>\n",
       "      <td>2020-10-02T07:07:35.046128</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_20.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.882336</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>15</td>\n",
       "      <td>common_voice_en_15.wav</td>\n",
       "      <td>2020-10-02T07:07:34.726251</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_15.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.877323</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import IPython.display as ipd\n",
    "#4. search\n",
    "search_results = vi_client.search(collection_name, audio_encoder.encode(documents[0]['audio']), \n",
    "    'audio_vector_', page_size=5)\n",
    "\n",
    "vi_client.show_json(search_results, audio_fields=['audio'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>name</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>audio</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>common_voice_en_2.wav</td>\n",
       "      <td>2020-10-02T07:07:34.725536</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_2.wav' type='audio/wav'></audio></td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>40</td>\n",
       "      <td>common_voice_en_40.wav</td>\n",
       "      <td>2020-10-02T07:07:35.272603</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_40.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.884632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>common_voice_en_3.wav</td>\n",
       "      <td>2020-10-02T07:07:34.725629</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_3.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.879187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>14</td>\n",
       "      <td>common_voice_en_14.wav</td>\n",
       "      <td>2020-10-02T07:07:34.726200</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_14.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.874556</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>21</td>\n",
       "      <td>common_voice_en_21.wav</td>\n",
       "      <td>2020-10-02T07:07:35.046224</td>\n",
       "      <td><audio controls><source src='https://vecsearch-bucket.s3.us-east-2.amazonaws.com/voices/common_voice_en_21.wav' type='audio/wav'></audio></td>\n",
       "      <td>0.865409</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#5 recommendation by id\n",
    "search_by_id_results = vi_client.search_by_id(collection_name, '2', 'audio_vector_', page_size=5)\n",
    "\n",
    "vi_client.show_json(search_by_id_results, audio_fields=['audio'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build Text QA Search in 5 minutes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: datasets in c:\\users\\jacky\\anaconda3\\lib\\site-packages (1.0.1)\n",
      "Requirement already satisfied: filelock in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (3.0.12)\n",
      "Requirement already satisfied: dill in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (0.3.1.1)\n",
      "Requirement already satisfied: pyarrow>=0.17.1 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (1.0.0)\n",
      "Requirement already satisfied: requests>=2.19.0 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (2.22.0)\n",
      "Requirement already satisfied: numpy>=1.17 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (1.19.1)\n",
      "Requirement already satisfied: pandas in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (0.25.2)\n",
      "Requirement already satisfied: tqdm>=4.27 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (4.36.1)\n",
      "Requirement already satisfied: xxhash in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from datasets) (2.0.0)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (2020.6.20)\n",
      "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (1.24.2)\n",
      "Requirement already satisfied: idna<2.9,>=2.5 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (2.8)\n",
      "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from requests>=2.19.0->datasets) (3.0.4)\n",
      "Requirement already satisfied: pytz>=2017.2 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from pandas->datasets) (2019.3)\n",
      "Requirement already satisfied: python-dateutil>=2.6.1 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from pandas->datasets) (2.8.0)\n",
      "Requirement already satisfied: six>=1.5 in c:\\users\\jacky\\anaconda3\\lib\\site-packages (from python-dateutil>=2.6.1->pandas->datasets) (1.12.0)\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "%pip install datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "collection_name = 'squad'\n",
    "\n",
    "#use huggingface's datasets library to download squad\n",
    "import datasets\n",
    "squad_dataset = datasets.load_dataset('squad')\n",
    "documents = [{'_id':str(n), **d} for n, d in enumerate(squad_dataset['validation'])]\n",
    "vi_client.delete_collection(collection_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#1. specify the vdb client\n",
    "from vectorai.client import ViClient\n",
    "vi_client = ViClient(username, api_key, url)\n",
    "vi_client.delete_collection(collection_name)\n",
    "\n",
    "#2. specify a text encoder\n",
    "from vectorai.models.deployed import ViText2Vec\n",
    "text_encoder = ViText2Vec(username, api_key, 'https://api.vctr.ai')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "d:\\kda\\vectorai\\vectorai\\read.py:351: UserWarning: Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\n",
      "  \"Potential issue. Cannot find a vector field. Check that the vector field is _vector_.\"\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "cbe3f07f6cdb4be78030b4b89440a277",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=704), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "{'inserted_successfully': 10570, 'failed': 0, 'failed_document_ids': []}"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'status': 'Finished'}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Done'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#3. insert the documents and encode text simultaneously\n",
    "use_jobs = True\n",
    "\n",
    "if use_jobs:\n",
    "    vi_client.insert_documents(collection_name, documents)\n",
    "    job = vi_client.encode_text_job(collection_name, 'question')\n",
    "    vi_client.wait_till_jobs_complete(collection_name, job['job_id'], job['job_name'])\n",
    "else:\n",
    "    vi_client.insert_documents(collection_name, documents, models={'question':text_encoder}, use_bulk_encode=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>question</th>\n",
       "      <th>answers</th>\n",
       "      <th>context</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>id</th>\n",
       "      <th>title</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>11</td>\n",
       "      <td>Who won Super Bowl 50?</td>\n",
       "      <td>{'answer_start': [177, 177, 177], 'text': ['De...</td>\n",
       "      <td>Super Bowl 50 was an American football game to...</td>\n",
       "      <td>2020-10-02T07:47:06.947313</td>\n",
       "      <td>56beace93aeaaa14008c91df</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.798744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>24</td>\n",
       "      <td>Who won Super Bowl 50?</td>\n",
       "      <td>{'answer_start': [177, 177, 177], 'text': ['De...</td>\n",
       "      <td>Super Bowl 50 was an American football game to...</td>\n",
       "      <td>2020-10-02T07:47:07.285182</td>\n",
       "      <td>56d20362e7d4791d009025eb</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.798744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Which NFL team won Super Bowl 50?</td>\n",
       "      <td>{'answer_start': [177, 177, 177], 'text': ['De...</td>\n",
       "      <td>Super Bowl 50 was an American football game to...</td>\n",
       "      <td>2020-10-02T07:47:06.946694</td>\n",
       "      <td>56be4db0acb8001400a502ef</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.763209</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>55</td>\n",
       "      <td>Who was the Super Bowl 50 MVP?</td>\n",
       "      <td>{'answer_start': [248, 248, 252], 'text': ['Vo...</td>\n",
       "      <td>The Broncos took an early lead in Super Bowl 5...</td>\n",
       "      <td>2020-10-02T07:47:07.759154</td>\n",
       "      <td>56be4eafacb8001400a50302</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.754090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>26</td>\n",
       "      <td>Which team won Super Bowl 50.</td>\n",
       "      <td>{'answer_start': [177, 177, 177], 'text': ['De...</td>\n",
       "      <td>Super Bowl 50 was an American football game to...</td>\n",
       "      <td>2020-10-02T07:47:07.285403</td>\n",
       "      <td>56d600e31c85041400946eb0</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.742759</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  _id                           question  \\\n",
       "0  11             Who won Super Bowl 50?   \n",
       "1  24             Who won Super Bowl 50?   \n",
       "2   3  Which NFL team won Super Bowl 50?   \n",
       "3  55     Who was the Super Bowl 50 MVP?   \n",
       "4  26      Which team won Super Bowl 50.   \n",
       "\n",
       "                                             answers  \\\n",
       "0  {'answer_start': [177, 177, 177], 'text': ['De...   \n",
       "1  {'answer_start': [177, 177, 177], 'text': ['De...   \n",
       "2  {'answer_start': [177, 177, 177], 'text': ['De...   \n",
       "3  {'answer_start': [248, 248, 252], 'text': ['Vo...   \n",
       "4  {'answer_start': [177, 177, 177], 'text': ['De...   \n",
       "\n",
       "                                             context  \\\n",
       "0  Super Bowl 50 was an American football game to...   \n",
       "1  Super Bowl 50 was an American football game to...   \n",
       "2  Super Bowl 50 was an American football game to...   \n",
       "3  The Broncos took an early lead in Super Bowl 5...   \n",
       "4  Super Bowl 50 was an American football game to...   \n",
       "\n",
       "                 insert_date_                        id          title  \\\n",
       "0  2020-10-02T07:47:06.947313  56beace93aeaaa14008c91df  Super_Bowl_50   \n",
       "1  2020-10-02T07:47:07.285182  56d20362e7d4791d009025eb  Super_Bowl_50   \n",
       "2  2020-10-02T07:47:06.946694  56be4db0acb8001400a502ef  Super_Bowl_50   \n",
       "3  2020-10-02T07:47:07.759154  56be4eafacb8001400a50302  Super_Bowl_50   \n",
       "4  2020-10-02T07:47:07.285403  56d600e31c85041400946eb0  Super_Bowl_50   \n",
       "\n",
       "   _search_score  \n",
       "0       0.798744  \n",
       "1       0.798744  \n",
       "2       0.763209  \n",
       "3       0.754090  \n",
       "4       0.742759  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#4. search\n",
    "search_results = vi_client.search(collection_name, \n",
    "                                   text_encoder.encode('who was the winner for nfl fifty'), \n",
    "                                   'question_vector_', page_size=5)\n",
    "\n",
    "#4.2 first result is the query text itself\n",
    "vi_client.results_to_df(search_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>question</th>\n",
       "      <th>answers</th>\n",
       "      <th>context</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>id</th>\n",
       "      <th>title</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>50</td>\n",
       "      <td>Who did Denver beat in the 2015 AFC Championsh...</td>\n",
       "      <td>{'answer_start': [372, 368, 372], 'text': ['Ne...</td>\n",
       "      <td>The Panthers finished the regular season with ...</td>\n",
       "      <td>2020-10-02T07:47:07.758767</td>\n",
       "      <td>56d6017d1c85041400946ec1</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>48</td>\n",
       "      <td>Who did Denver beat in the AFC championship?</td>\n",
       "      <td>{'answer_start': [372, 368, 372], 'text': ['Ne...</td>\n",
       "      <td>The Panthers finished the regular season with ...</td>\n",
       "      <td>2020-10-02T07:47:07.758541</td>\n",
       "      <td>56d2045de7d4791d009025f6</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.960072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>331</td>\n",
       "      <td>Who did the Broncos beat to win their division...</td>\n",
       "      <td>{'answer_start': [25, 25, 36], 'text': ['Pitts...</td>\n",
       "      <td>The Broncos defeated the Pittsburgh Steelers i...</td>\n",
       "      <td>2020-10-02T07:47:12.209038</td>\n",
       "      <td>56d99f99dc89441400fdb628</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.923735</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>330</td>\n",
       "      <td>Who did the Broncos defeat in the AFC Champion...</td>\n",
       "      <td>{'answer_start': [192, 192, 204], 'text': ['Ne...</td>\n",
       "      <td>The Broncos defeated the Pittsburgh Steelers i...</td>\n",
       "      <td>2020-10-02T07:47:12.208876</td>\n",
       "      <td>56d7018a0d65d214001982c5</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.915792</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>328</td>\n",
       "      <td>Who did the Broncos beat in the divisional game?</td>\n",
       "      <td>{'answer_start': [25, 21, 36], 'text': ['Pitts...</td>\n",
       "      <td>The Broncos defeated the Pittsburgh Steelers i...</td>\n",
       "      <td>2020-10-02T07:47:11.956089</td>\n",
       "      <td>56d7018a0d65d214001982c2</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.906187</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   _id                                           question  \\\n",
       "0   50  Who did Denver beat in the 2015 AFC Championsh...   \n",
       "1   48       Who did Denver beat in the AFC championship?   \n",
       "2  331  Who did the Broncos beat to win their division...   \n",
       "3  330  Who did the Broncos defeat in the AFC Champion...   \n",
       "4  328   Who did the Broncos beat in the divisional game?   \n",
       "\n",
       "                                             answers  \\\n",
       "0  {'answer_start': [372, 368, 372], 'text': ['Ne...   \n",
       "1  {'answer_start': [372, 368, 372], 'text': ['Ne...   \n",
       "2  {'answer_start': [25, 25, 36], 'text': ['Pitts...   \n",
       "3  {'answer_start': [192, 192, 204], 'text': ['Ne...   \n",
       "4  {'answer_start': [25, 21, 36], 'text': ['Pitts...   \n",
       "\n",
       "                                             context  \\\n",
       "0  The Panthers finished the regular season with ...   \n",
       "1  The Panthers finished the regular season with ...   \n",
       "2  The Broncos defeated the Pittsburgh Steelers i...   \n",
       "3  The Broncos defeated the Pittsburgh Steelers i...   \n",
       "4  The Broncos defeated the Pittsburgh Steelers i...   \n",
       "\n",
       "                 insert_date_                        id          title  \\\n",
       "0  2020-10-02T07:47:07.758767  56d6017d1c85041400946ec1  Super_Bowl_50   \n",
       "1  2020-10-02T07:47:07.758541  56d2045de7d4791d009025f6  Super_Bowl_50   \n",
       "2  2020-10-02T07:47:12.209038  56d99f99dc89441400fdb628  Super_Bowl_50   \n",
       "3  2020-10-02T07:47:12.208876  56d7018a0d65d214001982c5  Super_Bowl_50   \n",
       "4  2020-10-02T07:47:11.956089  56d7018a0d65d214001982c2  Super_Bowl_50   \n",
       "\n",
       "   _search_score  \n",
       "0       1.000000  \n",
       "1       0.960072  \n",
       "2       0.923735  \n",
       "3       0.915792  \n",
       "4       0.906187  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#5 recommendation by id\n",
    "search_by_id_results = vi_client.search_by_id(collection_name, documents[50]['_id'], 'question_vector_', page_size=5)\n",
    "\n",
    "#5.2 first result is the id's text itself\n",
    "vi_client.results_to_df(search_by_id_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>_id</th>\n",
       "      <th>question</th>\n",
       "      <th>answers</th>\n",
       "      <th>context</th>\n",
       "      <th>insert_date_</th>\n",
       "      <th>id</th>\n",
       "      <th>title</th>\n",
       "      <th>_search_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>258</td>\n",
       "      <td>How old was Peyton Manning in 2015?</td>\n",
       "      <td>{'answer_start': [817, 817, 817], 'text': ['39...</td>\n",
       "      <td>Following their loss in the divisional round o...</td>\n",
       "      <td>2020-10-02T07:47:11.000830</td>\n",
       "      <td>56bf301c3aeaaa14008c9550</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.641220</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>276</td>\n",
       "      <td>How may yards did Peyton Manning throw?</td>\n",
       "      <td>{'answer_start': [77, 77, 77], 'text': ['2,249...</td>\n",
       "      <td>Manning finished the year with a career-low 67...</td>\n",
       "      <td>2020-10-02T07:47:11.239195</td>\n",
       "      <td>56bf38383aeaaa14008c956c</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.634783</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>270</td>\n",
       "      <td>What was Peyton Manning's passer rating for th...</td>\n",
       "      <td>{'answer_start': [44, 44, 44], 'text': ['67.9'...</td>\n",
       "      <td>Manning finished the year with a career-low 67...</td>\n",
       "      <td>2020-10-02T07:47:11.238646</td>\n",
       "      <td>56beb57b3aeaaa14008c9279</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.617874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>252</td>\n",
       "      <td>Who did Peyton Manning play for as a rookie?</td>\n",
       "      <td>{'answer_start': [641, 637, 654], 'text': ['In...</td>\n",
       "      <td>Following their loss in the divisional round o...</td>\n",
       "      <td>2020-10-02T07:47:10.760423</td>\n",
       "      <td>56beb4e43aeaaa14008c9267</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.612926</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>356</td>\n",
       "      <td>Peyton Manning took how many different teams t...</td>\n",
       "      <td>{'answer_start': [57, 57, 57, 57], 'text': ['t...</td>\n",
       "      <td>Peyton Manning became the first quarterback ev...</td>\n",
       "      <td>2020-10-02T07:47:12.428915</td>\n",
       "      <td>56d704430d65d214001982de</td>\n",
       "      <td>Super_Bowl_50</td>\n",
       "      <td>0.611716</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   _id                                           question  \\\n",
       "0  258                How old was Peyton Manning in 2015?   \n",
       "1  276            How may yards did Peyton Manning throw?   \n",
       "2  270  What was Peyton Manning's passer rating for th...   \n",
       "3  252       Who did Peyton Manning play for as a rookie?   \n",
       "4  356  Peyton Manning took how many different teams t...   \n",
       "\n",
       "                                             answers  \\\n",
       "0  {'answer_start': [817, 817, 817], 'text': ['39...   \n",
       "1  {'answer_start': [77, 77, 77], 'text': ['2,249...   \n",
       "2  {'answer_start': [44, 44, 44], 'text': ['67.9'...   \n",
       "3  {'answer_start': [641, 637, 654], 'text': ['In...   \n",
       "4  {'answer_start': [57, 57, 57, 57], 'text': ['t...   \n",
       "\n",
       "                                             context  \\\n",
       "0  Following their loss in the divisional round o...   \n",
       "1  Manning finished the year with a career-low 67...   \n",
       "2  Manning finished the year with a career-low 67...   \n",
       "3  Following their loss in the divisional round o...   \n",
       "4  Peyton Manning became the first quarterback ev...   \n",
       "\n",
       "                 insert_date_                        id          title  \\\n",
       "0  2020-10-02T07:47:11.000830  56bf301c3aeaaa14008c9550  Super_Bowl_50   \n",
       "1  2020-10-02T07:47:11.239195  56bf38383aeaaa14008c956c  Super_Bowl_50   \n",
       "2  2020-10-02T07:47:11.238646  56beb57b3aeaaa14008c9279  Super_Bowl_50   \n",
       "3  2020-10-02T07:47:10.760423  56beb4e43aeaaa14008c9267  Super_Bowl_50   \n",
       "4  2020-10-02T07:47:12.428915  56d704430d65d214001982de  Super_Bowl_50   \n",
       "\n",
       "   _search_score  \n",
       "0       0.641220  \n",
       "1       0.634783  \n",
       "2       0.617874  \n",
       "3       0.612926  \n",
       "4       0.611716  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#6 hybrid search combining traditional and nlp vector search\n",
    "search_results = vi_client.hybrid_search(collection_name, 'Peyton Men',\n",
    "                                          text_encoder.encode('Peyton Men'),\n",
    "                                          ['question_vector_'], ['question'],\n",
    "                                          traditional_weight=0.015,\n",
    "                                          page_size=5)\n",
    "vi_client.results_to_df(search_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "celltoolbar": "Edit Metadata",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
