1
0
mirror of https://github.com/algora-io/tv.git synced 2024-11-16 00:58:59 +02:00

add vector search for vods (#28)

This commit is contained in:
Zafer Cesur 2024-05-01 23:34:05 +03:00 committed by GitHub
parent 1dcfd77dcb
commit 149191a9cf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 1616 additions and 45 deletions

View File

@ -89,7 +89,7 @@
# If you don't want TODO comments to cause `mix credo` to fail, just
# set this value to 0 (zero).
#
{Credo.Check.Design.TagTODO, [exit_status: 2]},
{Credo.Check.Design.TagTODO, [exit_status: 0]},
#
## Readability Checks

View File

@ -7,6 +7,11 @@ AWS_ENDPOINT_URL_S3="https://fly.storage.tigris.dev"
AWS_REGION="auto"
AWS_ACCESS_KEY_ID=""
AWS_SECRET_ACCESS_KEY=""
BUCKET_NAME=""
BUCKET_MEDIA=""
BUCKET_ML=""
REPLICATE_API_TOKEN=""
HF_TOKEN=""
EVENT_SINK_URL=""

2
.gitignore vendored
View File

@ -38,4 +38,4 @@ npm-debug.log
# Ignore local files
/tmp
/.local
/priv/cache
/priv/cache

View File

@ -1,7 +1,7 @@
import Ecto.Query
import Ecto.Changeset
alias Algora.{Accounts, Library, Repo, Storage}
alias Algora.{Accounts, Library, Repo, Storage, Cache, ML}
IEx.configure(inspect: [charlists: :as_lists])

View File

@ -18,7 +18,7 @@ ARG RUNNER_IMAGE="debian:bookworm-20231009-slim"
FROM ${BUILDER_IMAGE} as builder
# install build dependencies
RUN apt-get update -y && apt-get install -y build-essential git && apt-get clean && rm -f /var/lib/apt/lists/*_*
RUN apt-get update -y && apt-get install -y build-essential git curl && apt-get clean && rm -f /var/lib/apt/lists/*_*
# prepare build dir
WORKDIR /app

View File

@ -16,7 +16,7 @@
@apply border-gray-800;
}
body {
@apply bg-gray-950 text-white;
@apply bg-[radial-gradient(ellipse_at_top_left,_#1d1e3a_0%,_#050217_40%,_#050217_60%,_#1d1e3a_100%)] text-white;
}
}

View File

@ -144,9 +144,15 @@ const Hooks = {
const { player } = detail;
this.player.options({
techOrder: [player.type === "video/youtube" ? "youtube" : "html5"],
...(player.currentTime && player.type === "video/youtube"
? { youtube: { customVars: { start: player.currentTime } } }
: {}),
});
this.player.src({ src: player.src, type: player.type });
this.player.play();
if (player.currentTime && player.type !== "video/youtube") {
this.player.currentTime(player.currentTime);
}
this.player.el().parentElement.classList.remove("hidden");
this.player.el().parentElement.classList.add("flex");
@ -275,11 +281,33 @@ let liveSocket = new LiveSocket("/live", Socket, {
let routeUpdated = () => {
// TODO: uncomment
// Focus.focusMain();
const player = document.querySelector("#video-player")?.parentElement;
if (!player) {
return;
}
const pipClasses = [
"fixed",
"bottom-0",
"right-0",
"z-[1000]",
"w-[100vw]",
"sm:w-[30vw]",
];
if (/^\/[^\/]+\/\d+$/.test(new URL(window.location.href).pathname)) {
player.classList.add("lg:pr-[24rem]");
player.classList.remove(...pipClasses);
} else {
player.classList.remove("lg:pr-[24rem]");
player.classList.add(...pipClasses);
}
};
// Show progress bar on live navigation and form submits
topbar.config({
barColors: { 0: "rgba(147, 51, 234, 1)" },
barColors: { 0: "rgba(79, 70, 229, 1)" },
shadowColor: "rgba(0, 0, 0, .3)",
});
window.addEventListener("phx:page-loading-start", (info) =>

View File

@ -33,8 +33,7 @@ config :algora, AlgoraWeb.Embed.Endpoint,
]
config :algora, Oban,
repo: Algora.Repo,
notifier: Oban.Notifiers.PG,
repo: Algora.Repo.Local,
queues: [default: 10]
config :esbuild,
@ -66,6 +65,8 @@ config :logger, :console,
# Use Jason for JSON parsing in Phoenix
config :phoenix, :json_library, Jason
config :nx, default_backend: EXLA.Backend
# Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above.
import_config "#{config_env()}.exs"

View File

@ -1,6 +1,10 @@
import Config
config :algora, :files, bucket: System.get_env("BUCKET_NAME")
config :algora, mode: :dev
config :algora, :buckets,
media: System.get_env("BUCKET_MEDIA"),
ml: System.get_env("BUCKET_ML")
config :algora, :github,
client_id: System.get_env("GITHUB_CLIENT_ID"),

View File

@ -1,5 +1,7 @@
import Config
config :algora, mode: :prod
# For production, don't forget to configure the url host
# to something meaningful, Phoenix uses this information
# when generating URLs.

View File

@ -12,6 +12,12 @@ if System.get_env("PHX_SERVER") && System.get_env("RELEASE_NAME") do
config :algora, AlgoraWeb.Embed.Endpoint, server: true
end
config :algora,
hf_token: System.get_env("HF_TOKEN")
config :replicate,
replicate_api_token: System.get_env("REPLICATE_API_TOKEN")
if config_env() == :prod do
database_url =
System.get_env("DATABASE_URL") ||
@ -80,7 +86,9 @@ if config_env() == :prod do
],
secret_key_base: secret_key_base
config :algora, :files, bucket: System.fetch_env!("BUCKET_NAME")
config :algora, :buckets,
media: System.get_env("BUCKET_MEDIA"),
ml: System.get_env("BUCKET_ML")
config :algora, :github,
client_id: System.fetch_env!("GITHUB_CLIENT_ID"),

38
lib/algora/cache.ex Normal file
View File

@ -0,0 +1,38 @@
defmodule Algora.Cache do
def refetch(key, f) do
result = f.()
key |> path() |> write(result)
result
end
def fetch(key, f) do
case key |> path() |> read() do
{:ok, result} -> result
{:error, _} -> refetch(key, f)
end
end
def path(key) do
path = key |> String.split("/") |> Enum.map(&Slug.slugify/1)
dir =
case Algora.config([:mode]) do
:prod -> "/data"
_ -> :code.priv_dir(:algora)
end
Path.join([dir, "cache"] ++ path)
end
defp write(path, content) do
File.mkdir_p!(Path.dirname(path))
File.write(path, :erlang.term_to_binary(content))
end
defp read(path) do
case File.read(path) do
{:ok, binary} -> {:ok, :erlang.binary_to_term(binary)}
{:error, error} -> {:error, error}
end
end
end

View File

@ -7,8 +7,8 @@ defmodule Algora.Library do
import Ecto.Query, warn: false
import Ecto.Changeset
alias Algora.Accounts.User
alias Algora.{Repo, Accounts, Storage}
alias Algora.Library.{Channel, Video, Events, Subtitle}
alias Algora.{Repo, Accounts, Storage, Cache, ML}
alias Algora.Library.{Channel, Video, Events, Subtitle, Segment}
@pubsub Algora.PubSub
@ -183,19 +183,44 @@ defmodule Algora.Library do
join: u in User,
on: v.user_id == u.id,
where: u.id == ^user.id,
select_merge: %{channel_handle: u.handle, channel_name: u.name},
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
},
order_by: [desc: v.inserted_at],
limit: 1
)
|> Repo.one()
end
def transcribe_video(%Video{} = video, cb) do
dir = Path.join(System.tmp_dir!(), video.uuid)
File.mkdir_p!(dir)
mp3_local_path = Path.join(dir, "index.mp3")
cb.(%{stage: :transmuxing, done: 1, total: 1})
System.cmd("ffmpeg", ["-i", video.url, "-vn", mp3_local_path])
Storage.upload_from_filename(mp3_local_path, "#{video.uuid}/index.mp3", cb)
File.rm!(mp3_local_path)
Cache.fetch("#{Video.slug(video)}/transcription", fn ->
ML.transcribe_video_async("#{video.url_root}/index.mp3")
end)
end
def get_mp4_video(id) do
from(v in Video,
where: v.format == :mp4 and (v.transmuxed_from_id == ^id or v.id == ^id),
join: u in User,
on: v.user_id == u.id,
select_merge: %{channel_handle: u.handle, channel_name: u.name}
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
}
)
|> Repo.one()
end
@ -394,12 +419,41 @@ defmodule Algora.Library do
v.visibility == :public and
is_nil(v.vertical_thumbnail_url) and
(v.is_live == true or v.duration >= 120 or v.type == :vod),
select_merge: %{channel_handle: u.handle, channel_name: u.name}
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
}
)
|> order_by_inserted(:desc)
|> Repo.all()
end
def list_videos_by_ids(ids) do
videos =
from(v in Video,
join: u in User,
on: v.user_id == u.id,
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
},
where: v.id in ^ids
)
|> Repo.all()
video_by_id = fn id ->
videos
|> Enum.find(fn s -> s.id == id end)
end
ids
|> Enum.reduce([], fn id, acc -> [video_by_id.(id) | acc] end)
|> Enum.filter(& &1)
|> Enum.reverse()
end
def list_shorts(limit \\ 100) do
from(v in Video,
join: u in User,
@ -409,7 +463,11 @@ defmodule Algora.Library do
not is_nil(v.url) and
is_nil(v.transmuxed_from_id) and v.visibility == :public and
not is_nil(v.vertical_thumbnail_url),
select_merge: %{channel_handle: u.handle, channel_name: u.name}
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
}
)
|> order_by_inserted(:desc)
|> Repo.all()
@ -420,7 +478,11 @@ defmodule Algora.Library do
limit: ^limit,
join: u in User,
on: v.user_id == u.id,
select_merge: %{channel_handle: u.handle, channel_name: u.name},
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
},
where:
not is_nil(v.url) and
is_nil(v.transmuxed_from_id) and
@ -435,10 +497,11 @@ defmodule Algora.Library do
limit: ^limit,
join: u in assoc(v, :user),
left_join: m in assoc(v, :messages),
group_by: [v.id, u.handle, u.name],
group_by: [v.id, u.handle, u.name, u.avatar_url],
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url,
messages_count: count(m.id)
},
where:
@ -489,7 +552,11 @@ defmodule Algora.Library do
where: v.id == ^id,
join: u in User,
on: v.user_id == u.id,
select_merge: %{channel_handle: u.handle, channel_name: u.name}
select_merge: %{
channel_handle: u.handle,
channel_name: u.name,
channel_avatar_url: u.avatar_url
}
)
|> Repo.one!()
@ -513,6 +580,22 @@ defmodule Algora.Library do
def topic_studio(), do: "studio"
def list_segments(%Video{} = video) do
from(s in Segment, where: s.video_id == ^video.id, order_by: [asc: s.start])
|> Repo.all()
end
def list_segments_by_ids(ids) do
segments = from(s in Segment, where: s.id in ^ids) |> Repo.all()
segment_by_id = fn id -> segments |> Enum.find(fn s -> s.id == id end) end
ids
|> Enum.reduce([], fn id, acc -> [segment_by_id.(id) | acc] end)
|> Enum.filter(& &1)
|> Enum.reverse()
end
def list_subtitles(%Video{} = video) do
from(s in Subtitle, where: s.video_id == ^video.id, order_by: [asc: s.start])
|> Repo.all()

View File

@ -0,0 +1,42 @@
defmodule Algora.Library.Segment do
alias Algora.Library
alias Algora.Library.{Segment, Subtitle}
use Ecto.Schema
import Ecto.Changeset
schema "segments" do
field :start, :float
field :end, :float
field :body, :string
field :embedding, {:array, :float}
belongs_to :video, Library.Video
belongs_to :starting_subtitle, Library.Subtitle
belongs_to :ending_subtitle, Library.Subtitle
timestamps()
end
@doc false
def changeset(segment, attrs) do
segment
|> cast(attrs, [:body, :start, :end])
|> validate_required([:body, :start, :end])
end
def init([]), do: nil
def init(subtitles) do
body = subtitles |> Enum.map_join("", fn %Subtitle{body: body} -> body end)
starting_subtitle = subtitles |> Enum.at(0)
ending_subtitle = subtitles |> Enum.at(-1)
%Segment{
body: body,
start: starting_subtitle.start,
end: ending_subtitle.end,
video_id: starting_subtitle.video_id,
starting_subtitle_id: starting_subtitle.id,
ending_subtitle_id: ending_subtitle.id
}
end
end

View File

@ -24,6 +24,7 @@ defmodule Algora.Library.Video do
field :filename, :string
field :channel_handle, :string, virtual: true
field :channel_name, :string, virtual: true
field :channel_avatar_url, :string, virtual: true
field :messages_count, :integer, virtual: true, default: 0
field :visibility, Ecto.Enum, values: [public: 1, unlisted: 2]
field :remote_path, :string
@ -48,15 +49,26 @@ defmodule Algora.Library.Video do
put_assoc(changeset, :user, user)
end
def put_video_uuid(%Ecto.Changeset{} = changeset) do
if changeset.valid? do
uuid = Ecto.UUID.generate()
changeset
|> put_change(:uuid, uuid)
|> put_change(:url_root, url_root(uuid))
else
changeset
end
end
def put_video_meta(%Ecto.Changeset{} = changeset, format, basename \\ "index")
when format in [:mp4, :hls] do
if changeset.valid? do
uuid = Ecto.UUID.generate()
filename = "#{basename}#{fileext(format)}"
changeset
|> put_video_uuid()
|> put_change(:filename, filename)
|> put_change(:uuid, uuid)
else
changeset
end
@ -70,7 +82,6 @@ defmodule Algora.Library.Video do
changeset
|> put_change(:url, url(uuid, filename))
|> put_change(:url_root, url_root(uuid))
|> put_change(:remote_path, "#{uuid}/#{filename}")
else
changeset
@ -81,10 +92,14 @@ defmodule Algora.Library.Video do
defp fileext(:hls), do: ".m3u8"
defp url_root(uuid) do
bucket = Algora.config([:files, :bucket])
bucket = Algora.config([:buckets, :media])
%{scheme: scheme, host: host} = Application.fetch_env!(:ex_aws, :s3) |> Enum.into(%{})
"#{scheme}#{host}/#{bucket}/#{uuid}"
end
defp url(uuid, filename), do: "#{url_root(uuid)}/#{filename}"
def slug(%Video{} = video), do: Slug.slugify("#{video.id}-#{video.title}")
def id_from_slug(slug), do: slug |> String.split("-") |> Enum.at(0)
end

199
lib/algora/ml.ex Normal file
View File

@ -0,0 +1,199 @@
defmodule Algora.ML do
alias Replicate.Predictions
alias Replicate.Predictions.Prediction
alias Algora.{Storage, Library}
@chunk_size 128
@mistral "mistralai/Mixtral-8x7B-Instruct-v0.1"
@mpnet "replicate/all-mpnet-base-v2"
@whisper "vaibhavs10/incredibly-fast-whisper"
# @mistral_version ""
@mpnet_version "b6b7585c9640cd7a9572c6e129c9549d79c9c31f0d3fdce7baac7c67ca38f305"
@whisper_version "3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c"
def get!(id), do: Predictions.get!(id)
defp index_local_dir(), do: Path.join(System.tmp_dir!(), "algora/hnswlib")
defp index_local_path(), do: Path.join(index_local_dir(), "index")
def save_index(index) do
local_path = index_local_path()
HNSWLib.Index.save_index(index, local_path)
Storage.upload_from_filename_to_bucket(local_path, "index", :ml)
end
def load_index!() do
local_path = index_local_path()
if !File.exists?(local_path) do
File.mkdir_p!(index_local_dir())
{:ok, _} =
ExAws.S3.download_file(Algora.config([:buckets, :ml]), "index", local_path)
|> ExAws.request()
end
load_index_from_disk!(local_path)
end
defp load_index_from_disk!(path) do
case HNSWLib.Index.load_index(:cosine, 768, path, max_elements: 100_000) do
{:ok, index} ->
index
{:error, _} ->
{:ok, index} = HNSWLib.Index.new(:cosine, 768, 1_000_000)
save_index(index)
index
end
end
def add_embeddings(index, segments) do
for %Library.Segment{id: id, embedding: embedding} <- segments do
HNSWLib.Index.add_items(index, Nx.tensor(embedding), ids: [id])
end
save_index(index)
end
def get_relevant_chunks(index, embedding) do
{:ok, labels, _dist} =
HNSWLib.Index.knn_query(index, Nx.tensor(embedding), k: 100)
labels |> Nx.to_flat_list() |> Library.list_segments_by_ids()
end
def transcribe_video_async(path) do
run_async(
@whisper,
@whisper_version,
audio: path,
language: "english",
timestamp: "chunk",
batch_size: 64,
diarise_audio: false
)
end
def transcribe_video(path) do
run(
@whisper,
@whisper_version,
audio: path,
language: "english",
timestamp: "chunk",
batch_size: 64,
diarise_audio: false
)
end
def create_embedding(text) do
run(@mpnet, @mpnet_version, text: text)
end
def create_embeddings(segments) do
text_batch =
segments
|> Enum.map(fn %Library.Segment{body: body} -> body end)
|> Jason.encode!()
run(@mpnet, @mpnet_version, text_batch: text_batch)
end
def create_embeddings_async(segments) do
text_batch =
segments
|> Enum.map(fn %Library.Segment{body: body} -> body end)
|> Jason.encode!()
run_async(@mpnet, @mpnet_version, text_batch: text_batch)
end
def test do
Regex.named_captures(
~r/^(?P<model>[^\/]+\/[^:]+):(?P<version>.+)$/,
"replicate/all-mpnet-base-v2"
)
end
def run(model, version, input) do
Replicate.run("#{model}:#{version}", input)
end
def run_async(model, version, input) do
model = Replicate.Models.get!(model)
version = Replicate.Models.get_version!(model, version)
case Predictions.create(version, input) do
{:ok, %Prediction{} = prediction} -> prediction
{:error, message} -> {:error, message}
end
end
def fetch_output!(%Prediction{output: output}) do
{:ok, resp} = Finch.build(:get, output) |> Finch.request(Algora.Finch)
Jason.decode!(resp.body)
end
def load_tokenizer!() do
{:ok, tokenizer} =
Bumblebee.load_tokenizer({:hf, @mistral, auth_token: Algora.config([:hf_token])}, [
{:type, :llama}
])
tokenizer
end
def tokenize_and_measure(%Library.Segment{body: body}, tokenizer) do
%{"input_ids" => tensor} = Bumblebee.apply_tokenizer(tokenizer, body)
{1, len} = Nx.shape(tensor)
len
end
def tokenize_and_measure(subtitles, tokenizer) do
Library.Segment.init(subtitles) |> tokenize_and_measure(tokenizer)
end
def format_segment(%Library.Segment{start: start, body: body} = segment),
do:
"#{Library.to_hhmmss(start)} - [#{segment.starting_subtitle_id}, #{segment.ending_subtitle_id}]\n#{body}"
def chunk(video) do
subtitles = Library.list_subtitles(video)
chunk(load_tokenizer!(), [], [], subtitles)
|> Enum.map(&Library.Segment.init/1)
end
def chunk(_, chunks, [], []), do: Enum.reverse(chunks)
def chunk(tokenizer, chunks, chunk, []), do: chunk(tokenizer, [chunk | chunks], [], [])
def chunk(tokenizer, chunks, chunk, [subtitle | subtitles]) do
new_chunk = [subtitle | chunk]
valid? = tokenize_and_measure(new_chunk, tokenizer) <= @chunk_size
cond do
valid? ->
chunk(tokenizer, chunks, new_chunk, subtitles)
chunk == [] ->
chunk(
tokenizer,
chunks,
[],
subtitles
)
true ->
chunk(
tokenizer,
[Enum.reverse(chunk) | chunks],
chunk |> Enum.take(min(2, length(chunk) - 1)),
[subtitle | subtitles]
)
end
end
end

View File

@ -83,13 +83,19 @@ defmodule Algora.Storage do
{:ok, state}
end
def upload(contents, remote_path, opts \\ []) do
Algora.config([:files, :bucket])
def upload_to_bucket(contents, remote_path, bucket, opts \\ []) do
Algora.config([:buckets, bucket])
|> ExAws.S3.put_object(remote_path, contents, opts)
|> ExAws.request([])
end
def upload_from_filename(local_path, remote_path, cb \\ fn _ -> nil end, opts \\ []) do
def upload_from_filename_to_bucket(
local_path,
remote_path,
bucket,
cb \\ fn _ -> nil end,
opts \\ []
) do
%{size: size} = File.stat!(local_path)
chunk_size = 5 * 1024 * 1024
@ -99,10 +105,24 @@ defmodule Algora.Storage do
cb.(%{stage: :persisting, done: chunk_size, total: size})
chunk
end)
|> ExAws.S3.upload(Algora.config([:files, :bucket]), remote_path, opts)
|> ExAws.S3.upload(Algora.config([:buckets, bucket]), remote_path, opts)
|> ExAws.request([])
end
def upload(contents, remote_path, opts \\ []) do
upload_to_bucket(contents, remote_path, :media, opts)
end
def upload_from_filename(local_path, remote_path, cb \\ fn _ -> nil end, opts \\ []) do
upload_from_filename_to_bucket(
local_path,
remote_path,
:media,
cb,
opts
)
end
defp broadcast!(topic, msg) do
Phoenix.PubSub.broadcast!(@pubsub, topic, {__MODULE__, msg})
end

86
lib/algora/util.ex Normal file
View File

@ -0,0 +1,86 @@
defmodule Algora.Util do
@common_words [
"a",
"add",
"again",
"air",
"also",
"an",
"and",
"are",
"as",
"ask",
"at",
"be",
"but",
"by",
"can",
"do",
"does",
"each",
"end",
"even",
"for",
"from",
"get",
"got",
"had",
"have",
"he",
"here",
"his",
"how",
"i",
"if",
"in",
"is",
"it",
"kind",
"men",
"must",
"my",
"near",
"need",
"of",
"off",
"on",
"one",
"or",
"other",
"our",
"out",
"put",
"said",
"self",
"set",
"some",
"such",
"tell",
"that",
"the",
"their",
"they",
"this",
"to",
"try",
"us",
"use",
"want",
"was",
"we're",
"we",
"well",
"went",
"were",
"what",
"which",
"why",
"will",
"with",
"you're",
"you",
"your"
]
def is_common_word(s), do: Enum.member?(@common_words, s)
end

View File

@ -0,0 +1,75 @@
defmodule Algora.Workers.Transcriber do
use Oban.Worker, queue: :default, max_attempts: 1, unique: [period: 86_400]
alias Algora.Library
import Ecto.Query, warn: false
require Logger
@impl Oban.Worker
def perform(%Oban.Job{args: %{"video_id" => video_id}} = job) do
video = Library.get_video!(video_id)
build_transcriber(job, video)
await_transcriber(video)
end
defp build_transcriber(job, %Library.Video{} = video) do
job_pid = self()
Task.async(fn ->
try do
prediction =
Library.transcribe_video(video, fn progress ->
send(job_pid, {:progress, progress})
end)
output =
await_prediction(prediction.id, fn progress ->
send(job_pid, {:progress, progress})
end)
dbg(output)
send(job_pid, {:complete, video})
rescue
e ->
send(job_pid, {:error, e, job})
reraise e, __STACKTRACE__
end
end)
end
defp await_prediction(id, cb) do
case Replicate.Predictions.get(id) do
{:ok, %Replicate.Predictions.Prediction{status: "succeeded", output: output}} ->
{:ok, resp} = Finch.build(:get, output) |> Finch.request(Algora.Finch)
Jason.decode!(resp.body)
{:ok, %Replicate.Predictions.Prediction{logs: logs}} ->
cb.(%{stage: logs |> String.split("\n") |> Enum.at(-1), done: 1, total: 1})
:timer.sleep(1000)
await_prediction(id, cb)
error ->
error
end
end
defp await_transcriber(video, stage \\ :retrieving, done \\ 0) do
receive do
{:progress, %{stage: stage_now, done: done_now, total: total}} ->
Library.broadcast_processing_progressed!(stage, video, min(1, done / total))
done_total = if(stage == stage_now, do: done, else: 0)
await_transcriber(video, stage_now, done_total + done_now)
{:complete, video} ->
Library.broadcast_processing_progressed!(stage, video, 1)
Library.broadcast_processing_completed!(:transcription, video, video.url)
{:ok, video.url}
{:error, e, %Oban.Job{attempt: attempt, max_attempts: max_attempts}} ->
Library.broadcast_processing_failed!(video, attempt, max_attempts)
{:error, e}
end
end
end

View File

@ -133,7 +133,9 @@ defmodule AlgoraWeb.CoreComponents do
def short_entry(assigns) do
~H"""
<.link class="cursor-pointer truncate" navigate={~p"/#{@video.channel_handle}/#{@video.id}"}>
<%!-- HACK: should use navigate instead of href here --%>
<%!-- but it breaks navigating from youtube video to another video --%>
<.link class="cursor-pointer truncate" href={~p"/#{@video.channel_handle}/#{@video.id}"}>
<.short_thumbnail video={@video} class="rounded-2xl" />
<div class="pt-2 text-base font-semibold truncate"><%= @video.title %></div>
<div class="text-gray-300 text-sm font-medium"><%= @video.channel_name %></div>
@ -146,7 +148,9 @@ defmodule AlgoraWeb.CoreComponents do
def video_entry(assigns) do
~H"""
<.link class="cursor-pointer truncate" navigate={~p"/#{@video.channel_handle}/#{@video.id}"}>
<%!-- HACK: should use navigate instead of href here --%>
<%!-- but it breaks navigating from youtube video to another video --%>
<.link class="cursor-pointer truncate" href={~p"/#{@video.channel_handle}/#{@video.id}"}>
<.video_thumbnail video={@video} class="rounded-2xl" />
<div class="pt-2 text-base font-semibold truncate"><%= @video.title %></div>
<div class="text-gray-300 text-sm font-medium"><%= @video.channel_name %></div>

View File

@ -10,17 +10,37 @@
<%= assigns[:page_title] || "Algora TV" %>
</.live_title>
<%= if assigns[:page_image] do %>
<meta name="description" content={assigns[:page_description]} />
<meta name="twitter:image:src" content={assigns[:page_image]} />
<meta name="twitter:site" content="@algoraio" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content={"#{assigns[:page_title]} | Algora TV"} />
<meta name="twitter:description" content={assigns[:page_description]} />
<meta property="og:image" content={assigns[:page_image]} />
<meta property="og:image:width" content="1200" />
<meta property="og:image:height" content="630" />
<meta property="og:site_name" content="Algora TV" />
<meta property="og:type" content="website" />
<meta property="og:title" content={"#{assigns[:page_title]} | Algora TV"} />
<meta property="og:url" content={"#{assigns[:page_url]}"} />
<meta property="og:description" content={assigns[:page_description]} />
<% end %>
<%= if assigns[:channel_handle] && assigns[:channel_name] && assigns[:channel_tagline] do %>
<meta name="description" content={assigns[:channel_tagline]} />
<meta
name="twitter:image:src"
content={"#{AlgoraWeb.Endpoint.url()}/images/og/default.png"}
content={assigns[:page_image] || "#{AlgoraWeb.Endpoint.url()}/images/og/default.png"}
/>
<meta name="twitter:site" content="@algoraio" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content={"#{assigns[:channel_name]} | Algora TV"} />
<meta name="twitter:description" content={assigns[:channel_tagline]} />
<meta property="og:image" content={"#{AlgoraWeb.Endpoint.url()}/images/og/default.png"} />
<meta
property="og:image"
content={assigns[:page_image] || "#{AlgoraWeb.Endpoint.url()}/images/og/default.png"}
/>
<meta property="og:image:width" content="1200" />
<meta property="og:image:height" content="630" />
<meta property="og:site_name" content="Algora TV" />

View File

@ -14,13 +14,16 @@
<meta name="description" content={assigns[:channel_tagline]} />
<meta
name="twitter:image:src"
content={"#{AlgoraWeb.Endpoint.url()}/images/og/default.png"}
content={assigns[:page_image] || "#{AlgoraWeb.Endpoint.url()}/images/og/default.png"}
/>
<meta name="twitter:site" content="@algoraio" />
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content={"#{assigns[:channel_name]} | Algora TV"} />
<meta name="twitter:description" content={assigns[:channel_tagline]} />
<meta property="og:image" content={"#{AlgoraWeb.Endpoint.url()}/images/og/default.png"} />
<meta
property="og:image"
content={assigns[:page_image] || "#{AlgoraWeb.Endpoint.url()}/images/og/default.png"}
/>
<meta property="og:image:width" content="1200" />
<meta property="og:image:height" content="630" />
<meta property="og:site_name" content="Algora TV" />

View File

@ -0,0 +1,300 @@
defmodule AlgoraWeb.COSSGPTLive do
use AlgoraWeb, :live_view
alias Algora.{Library, ML, Cache, Util}
@impl true
def render(assigns) do
~H"""
<div class="px-4 py-4 lg:py-8 text-white min-h-screen max-w-7xl mx-auto overflow-hidden">
<h1 class="flex items-center justify-center gap-2 sm:gap-4 text-4xl sm:text-6xl font-bold font-mono text-purple-400 [text-shadow:#000_10px_5px_10px]">
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-8 w-8 sm:h-16 sm:w-16 text-purple-300"
>
<path stroke="none" d="M0 0h24v24H0z" fill="none" /><path d="M16 18a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm0 -12a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm-7 12a6 6 0 0 1 6 -6a6 6 0 0 1 -6 -6a6 6 0 0 1 -6 6a6 6 0 0 1 6 6z" />
</svg>
<span class="text-purple-300">COSS</span><span class="text-purple-400">gpt</span>
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-8 w-8 sm:h-16 sm:w-16 text-purple-400"
>
<path stroke="none" d="M0 0h24v24H0z" fill="none" /><path d="M16 18a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm0 -12a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm-7 12a6 6 0 0 1 6 -6a6 6 0 0 1 -6 -6a6 6 0 0 1 -6 6a6 6 0 0 1 6 6z" />
</svg>
</h1>
<form class="mt-4 sm:mt-8 max-w-lg mx-auto" phx-submit="search">
<label for="query" class="mb-2 text-sm font-medium sr-only text-white">
Search
</label>
<div class="relative">
<div class="absolute inset-y-0 start-0 flex items-center ps-3 pointer-events-none">
<svg
class="w-4 h-4 text-purple-300"
aria-hidden="true"
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 20 20"
>
<path
stroke="currentColor"
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="m19 19-4-4m0-7A7 7 0 1 1 1 8a7 7 0 0 1 14 0Z"
/>
</svg>
</div>
<input
type="search"
id="query"
name="query"
value={@query}
autocomplete="off"
class="w-full p-4 ps-10 text-sm border rounded-lg border-purple-500 bg-white/[5%] placeholder-purple-300 text-white ring-purple-500 ring-1 focus:ring-2 focus:ring-purple-500 focus:outline-none"
placeholder="Ask anything about COSS..."
required
/>
<button
type="submit"
class="text-white absolute end-2.5 bottom-2.5 focus:ring-4 focus:outline-none font-medium rounded-lg text-sm px-4 py-2 bg-purple-600 hover:bg-purple-700 focus:ring-purple-800"
>
Learn
</button>
</div>
</form>
<div class="mt-4 sm:mt-8">
<div class="uppercase text-center text-gray-300 tracking-tight text-xs font-semibold">
Suggestions
</div>
<div class="mt-4 flex flex-wrap gap-2 justify-center mx-auto">
<div
:for={
suggestion_group <- [
[
"Benefits of going open source",
"Business models and pricing",
"Choosing a license",
"How to hire engineers"
],
[
"How to get your first customers",
"B2B startup metrics",
"Setting KPIs and goals",
"How to fundraise",
"Developer marketing"
]
]
}
class="-ml-2 -mt-2 p-2 z-10 flex md:justify-center whitespace-nowrap md:flex-wrap gap-4 overflow-x-auto md:overflow-x-hidden scrollbar-thin"
>
<button
:for={suggestion <- suggestion_group}
phx-click="search"
phx-value-query={suggestion}
class={[
"text-gray-200 font-medium text-sm px-3 py-2 ring-1 hover:ring-2 ring-white/20 shadow-inner inline-flex rounded-lg hover:ring-white/25 hover:bg-white/5 hover:text-white transition-colors",
if(suggestion == @query,
do: "bg-white/5 ring-white/25 hover:ring-white/25",
else: "bg-white/10 ring-white/20"
)
]}
>
<%= suggestion %>
</button>
</div>
</div>
</div>
<div class="space-y-4 lg:space-y-8 mt-4 lg:mt-8">
<div :if={@task} class="flex-1 space-y-4 lg:space-y-8">
<div :for={_ <- 1..2} class="gap-8 hidden lg:flex">
<div class="w-1/2 rounded-2xl aspect-video bg-white/20 animate-pulse"></div>
<div class="w-1/2 rounded-2xl aspect-video bg-white/20 animate-pulse"></div>
</div>
<div :for={_ <- 1..3} class="gap-8 lg:hidden flex">
<div class="w-full rounded-2xl aspect-video bg-white/20 animate-pulse lg:hidden"></div>
</div>
</div>
<div :if={@results} class="flex-1 space-y-8">
<div
:for={%{video: video, segments: segments} <- @results}
class="flex flex-col lg:flex-row gap-8"
>
<.link navigate={video_url(video, Enum.at(segments, 0))} class="w-full shrink-0 lg:shrink">
<.video_thumbnail video={video} class="w-full rounded-2xl" />
</.link>
<div>
<div>
<.link
navigate={video_url(video, Enum.at(segments, 0))}
class="text-lg font-bold line-clamp-2"
>
<%= video.title %>
</.link>
<p class="text-sm text-gray-300"><%= Timex.from_now(video.inserted_at) %></p>
<.link navigate={"/#{video.channel_handle}"} class="mt-2 flex items-center gap-2">
<span class="relative flex items-center h-8 w-8 shrink-0 overflow-hidden rounded-full">
<img
class="aspect-square h-full w-full"
alt={video.channel_name}
src={video.channel_avatar_url}
/>
</span>
<span class="text-sm text-gray-300"><%= video.channel_name %></span>
</.link>
</div>
<div class="mt-4 relative">
<div class="w-full h-full pointer-events-none absolute bg-gradient-to-r from-transparent from-[75%] to-gray-900 rounded-xl">
</div>
<div class="bg-white/[7.5%] border border-white/[20%] p-4 rounded-xl flex gap-8 w-[calc(100vw-2rem)] md:hidden lg:flex lg:w-[22rem] xl:w-[40rem] overflow-x-auto pb-4 -mb-4 scrollbar-thin">
<.link
:for={segment <- segments}
class="space-x-2"
navigate={video_url(video, segment)}
>
<div class="w-[66vw] lg:w-[20rem] xl:w-[28rem]">
<p class="text-base font-semibold text-green-400">
<%= Library.to_hhmmss(segment.start) %>
</p>
<p class="mt-2 text-sm">
<span
:for={word <- segment.body |> String.split(~r/\s/)}
class={[matches_query?(@query_words, word) && "text-green-300 font-medium"]}
>
<%= word %>
</span>
</p>
</div>
</.link>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
"""
end
defp video_url(video, segment) do
params =
case segment do
nil -> ""
s -> "?t=#{trunc(s.start)}"
end
"/#{video.channel_handle}/#{video.id}#{params}"
end
@impl true
def mount(_params, _session, socket) do
{:ok, socket}
end
@impl true
def handle_params(params, _url, socket) do
{:noreply, socket |> apply_action(socket.assigns.live_action, params)}
end
@impl true
def handle_event("search", %{"query" => query}, socket) do
{:noreply, socket |> push_patch(to: ~p"/cossgpt?#{%{query: query}}")}
end
@impl true
def handle_info({ref, result}, socket) when socket.assigns.task.ref == ref do
{:noreply, assign(socket, task: nil, results: result)}
end
def handle_info(_, socket) do
{:noreply, socket}
end
defp apply_action(socket, :index, params) do
socket =
case params["query"] || "" do
"" ->
socket
|> assign(
query: nil,
query_words: nil,
task: nil,
results: nil
)
query ->
socket
|> assign(
query: query,
query_words: query |> String.split(~r/\s/) |> Enum.map(&normalize_word/1),
task: Task.async(fn -> fetch_results(query) end),
results: nil
)
end
socket
|> assign(
page_title: "COSSgpt",
page_description: "Learn how to build a commercial open source software company",
page_url: "https://tv.algora.io/cossgpt",
page_image: "#{AlgoraWeb.Endpoint.url()}/images/og/cossgpt.png"
)
end
defp fetch_results(query) do
[%{"embedding" => embedding}] =
Cache.fetch("embeddings/#{Slug.slugify(query)}", fn ->
ML.create_embedding(query)
end)
index = ML.load_index!()
segments = ML.get_relevant_chunks(index, embedding)
to_result = fn video ->
%{
video: video,
segments: segments |> Enum.filter(fn s -> s.video_id == video.id end)
}
end
segments
|> Enum.map(fn %Library.Segment{video_id: video_id} -> video_id end)
|> Enum.uniq()
|> Library.list_videos_by_ids()
|> Enum.map(to_result)
end
defp normalize_word(s) do
s
|> String.replace(~r/[^A-Za-z0-9]/, "")
|> String.downcase()
end
defp matches_query?(query_words, s) do
query_words
|> Enum.any?(fn s2 ->
s1 = normalize_word(s)
String.length(s1) >= 3 and
String.length(s2) >= 3 and
(String.contains?(s1, s2) or String.contains?(s2, s1)) and
!Util.is_common_word(s1) and
!Util.is_common_word(s2)
end)
end
end

View File

@ -0,0 +1,275 @@
defmodule AlgoraWeb.COSSGPTOGLive do
use AlgoraWeb, :live_view
alias Algora.{Library, ML, Cache, Util}
@impl true
def render(assigns) do
~H"""
<div class="px-4 py-8 text-white mx-auto overflow-hidden flex gap-[20rem]">
<div class="ml-[18rem] h-[calc(100vh-88px)] flex flex-col items-center justify-center scale-150">
<h1 class="flex items-center gap-4 text-8xl font-bold font-mono text-green-300 [text-shadow:#000_10px_5px_10px]">
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-20 w-20 text-green-300"
>
<path stroke="none" d="M0 0h24v24H0z" fill="none" /><path d="M16 18a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm0 -12a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm-7 12a6 6 0 0 1 6 -6a6 6 0 0 1 -6 -6a6 6 0 0 1 -6 6a6 6 0 0 1 6 6z" />
</svg>
<span class="text-green-300">COSS</span><span class="text-green-400">gpt</span>
<svg
xmlns="http://www.w3.org/2000/svg"
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
class="h-20 w-20 text-green-400"
>
<path stroke="none" d="M0 0h24v24H0z" fill="none" /><path d="M16 18a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm0 -12a2 2 0 0 1 2 2a2 2 0 0 1 2 -2a2 2 0 0 1 -2 -2a2 2 0 0 1 -2 2zm-7 12a6 6 0 0 1 6 -6a6 6 0 0 1 -6 -6a6 6 0 0 1 -6 6a6 6 0 0 1 6 6z" />
</svg>
</h1>
<form class="mt-10 w-full max-w-lg mx-auto scale-[1.3]" phx-submit="search">
<label for="query" class="mb-2 text-sm font-medium sr-only text-white">
Search
</label>
<div class="relative">
<div class="absolute inset-y-0 start-0 flex items-center ps-4 pointer-events-none">
<svg
class="w-8 h-8 text-green-200"
aria-hidden="true"
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 20 20"
>
<path
stroke="currentColor"
stroke-linecap="round"
stroke-linejoin="round"
stroke-width="2"
d="m19 19-4-4m0-7A7 7 0 1 1 1 8a7 7 0 0 1 14 0Z"
/>
</svg>
</div>
<input
type="search"
id="query"
name="query"
value={@query}
autocomplete="off"
class="block w-full p-4 ps-14 border rounded-lg border-green-400 bg-white/[5%] placeholder-gray-400 text-green-100 ring-4 ring-green-400 focus:outline-none text-2xl font-medium"
placeholder="Anything about commercial open-source software..."
required
/>
<button
type="submit"
class="text-green-950 text-2xl absolute end-2.5 bottom-2.5 focus:ring-4 focus:outline-none font-bold rounded-lg px-4 py-2 bg-green-200 hover:bg-green-700 focus:ring-green-800"
>
Learn
</button>
</div>
</form>
<div class="mt-20 scale-[1.37]">
<div class="uppercase text-center text-gray-300 tracking-tight text-xs font-semibold">
Suggestions
</div>
<div class="mt-2 flex flex-wrap gap-4 justify-center max-w-2xl mx-auto">
<button
:for={
suggestion <- [
"Business models and pricing",
"Choosing a license",
"How to hire engineers"
]
}
phx-click="search"
phx-value-query={suggestion}
class="bg-white/10 text-gray-200 font-medium text-sm px-3 py-2 ring-1 ring-white/20 shadow-inner inline-flex rounded-lg hover:ring-white/25 hover:bg-white/5 hover:text-white transition-colors"
>
<%= suggestion %>
</button>
</div>
</div>
</div>
<div class="flex">
<div :if={@results} class="flex-1 space-y-8">
<div
:for={%{video: video, segments: segments} <- @results}
class="flex flex-col lg:flex-row gap-8"
>
<.link
navigate={video_url(video, Enum.at(segments, 0))}
class="w-full shrink-0 max-w-[40rem]"
>
<.video_thumbnail video={video} class="w-full rounded-2xl" />
</.link>
<div>
<div>
<.link
navigate={video_url(video, Enum.at(segments, 0))}
class="text-lg font-bold line-clamp-2"
>
<%= video.title %>
</.link>
<p class="text-sm text-gray-300"><%= Timex.from_now(video.inserted_at) %></p>
<.link navigate={"/#{video.channel_handle}"} class="mt-2 flex items-center gap-2">
<span class="relative flex items-center h-8 w-8 shrink-0 overflow-hidden rounded-full">
<img
class="aspect-square h-full w-full"
alt={video.channel_name}
src={video.channel_avatar_url}
/>
</span>
<span class="text-sm text-gray-300"><%= video.channel_name %></span>
</.link>
</div>
<div class="mt-4 relative">
<div class="w-full h-full pointer-events-none absolute bg-gradient-to-r from-transparent from-[75%] to-gray-900 rounded-xl">
</div>
<div class="bg-white/[7.5%] border border-white/[20%] p-4 rounded-xl flex gap-8 w-[calc(100vw-2rem)] lg:w-[22rem] xl:w-[40rem] overflow-x-auto pb-4 -mb-4 scrollbar-thin">
<.link
:for={segment <- segments}
class="space-x-2"
navigate={video_url(video, segment)}
>
<div class="w-[66vw] lg:w-[20rem] xl:w-[28rem]">
<p class="text-base font-semibold text-green-400">
<%= Library.to_hhmmss(segment.start) %>
</p>
<p class="mt-2 text-sm">
<span
:for={word <- segment.body |> String.split(~r/\s/)}
class={[matches_query?(@query_words, word) && "text-green-300 font-medium"]}
>
<%= word %>
</span>
</p>
</div>
</.link>
</div>
</div>
</div>
</div>
</div>
<div :if={@task} class="flex-1 space-y-8">
<div :for={_ <- 1..2} class="flex gap-8">
<div class="w-1/2 rounded-2xl aspect-video bg-white/20 animate-pulse"></div>
<div class="w-1/2 rounded-2xl aspect-video bg-white/20 animate-pulse"></div>
</div>
</div>
</div>
</div>
"""
end
defp video_url(video, segment) do
params =
case segment do
nil -> ""
s -> "?t=#{trunc(s.start)}"
end
"/#{video.channel_handle}/#{video.id}#{params}"
end
@impl true
def mount(_params, _session, socket) do
{:ok, socket}
end
@impl true
def handle_params(params, _url, socket) do
{:noreply, socket |> apply_action(socket.assigns.live_action, params)}
end
@impl true
def handle_event("search", %{"query" => query}, socket) do
{:noreply, socket |> push_patch(to: ~p"/og/cossgpt?#{%{query: query}}")}
end
@impl true
def handle_info({ref, result}, socket) when socket.assigns.task.ref == ref do
{:noreply, assign(socket, task: nil, results: result)}
end
def handle_info(_, socket) do
{:noreply, socket}
end
defp apply_action(socket, :index, params) do
socket =
case params["query"] || "" do
"" ->
socket
|> assign(
query: nil,
query_words: nil,
task: nil,
results: nil
)
query ->
socket
|> assign(
query: query,
query_words: query |> String.split(~r/\s/) |> Enum.map(&normalize_word/1),
task: Task.async(fn -> fetch_results(query) end),
results: nil
)
end
socket |> assign(:page_title, "COSSgpt")
end
defp fetch_results(query) do
[%{"embedding" => embedding}] =
Cache.fetch("embeddings/#{Slug.slugify(query)}", fn ->
ML.create_embedding(query)
end)
index = ML.load_index!()
segments = ML.get_relevant_chunks(index, embedding)
to_result = fn video ->
%{
video: video,
segments: segments |> Enum.filter(fn s -> s.video_id == video.id end)
}
end
segments
|> Enum.map(fn %Library.Segment{video_id: video_id} -> video_id end)
|> Enum.uniq()
|> Library.list_videos_by_ids()
|> Enum.map(to_result)
end
defp normalize_word(s) do
s
|> String.replace(~r/[^A-Za-z0-9]/, "")
|> String.downcase()
end
defp matches_query?(query_words, s) do
query_words
|> Enum.any?(fn s2 ->
s1 = normalize_word(s)
String.length(s1) >= 3 and
String.length(s2) >= 3 and
(String.contains?(s1, s2) or String.contains?(s2, s1)) and
!Util.is_common_word(s1) and
!Util.is_common_word(s2)
end)
end
end

View File

@ -10,7 +10,7 @@ defmodule AlgoraWeb.PlayerLive do
<video
id="video-player"
phx-hook="VideoPlayer"
class="video-js vjs-default-skin min-w-xl max-w-2xl aspect-video vjs-fluid h-full w-full flex-1 rounded-2xl overflow-hidden"
class="video-js vjs-default-skin aspect-video h-full w-full flex-1 rounded-2xl overflow-hidden"
controls
/>
</div>

View File

@ -10,7 +10,7 @@ defmodule AlgoraWeb.SignInLive do
</h2>
<a
href={Algora.Github.authorize_url()}
class="mt-8 w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-purple-600 hover:bg-purple-500 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-purple-400"
class="mt-8 w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-purple-600 hover:bg-purple-600 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-purple-400"
>
Sign in with GitHub
</a>

View File

@ -252,21 +252,31 @@ defmodule AlgoraWeb.StudioLive do
{:noreply, redirect(socket, external: mp4_video.url)}
else
video = Library.get_video!(id)
send(self(), {Library, %Library.Events.ProcessingQueued{video: video}})
%{video_id: id}
|> Workers.MP4Transmuxer.new()
|> Oban.insert()
send(self(), {Library, %Library.Events.ProcessingQueued{video: video}})
{:noreply, socket}
end
end
def handle_event("transcribe_video", %{"id" => id}, socket) do
video = Library.get_video!(id)
%{video_id: id}
|> Workers.Transcriber.new()
|> Oban.insert()
send(self(), {Library, %Library.Events.ProcessingQueued{video: video}})
{:noreply, socket}
end
def handle_event("upload_videos", _params, socket) do
_videos =
consume_uploaded_entries(socket, :video, fn %{path: path}, entry ->
video = Library.init_mp4!(entry, path, socket.assigns.current_user)
send(self(), {Library, %Library.Events.ProcessingQueued{video: video}})
# TODO: add to oban queue instead
# ensure that the worker runs in the same machine where the upload is consumed
@ -275,6 +285,7 @@ defmodule AlgoraWeb.StudioLive do
# |> Oban.insert()
Library.transmux_to_hls(video, fn _ -> nil end)
send(self(), {Library, %Library.Events.ProcessingQueued{video: video}})
{:ok, video}
end)

View File

@ -346,7 +346,11 @@ defmodule AlgoraWeb.VideoLive do
"""
end
def mount(%{"channel_handle" => channel_handle, "video_id" => video_id}, _session, socket) do
def mount(
%{"channel_handle" => channel_handle, "video_id" => video_id} = params,
_session,
socket
) do
%{current_user: current_user} = socket.assigns
channel =
@ -378,11 +382,10 @@ defmodule AlgoraWeb.VideoLive do
|> Jason.encode(pretty: true)
types = %{subtitles: :string}
params = %{subtitles: encoded_subtitles}
changeset =
{data, types}
|> Ecto.Changeset.cast(params, Map.keys(types))
|> Ecto.Changeset.cast(%{subtitles: encoded_subtitles}, Map.keys(types))
socket =
socket
@ -398,7 +401,7 @@ defmodule AlgoraWeb.VideoLive do
|> stream(:videos, videos)
|> stream(:presences, Presence.list_online_users(channel_handle))
if connected?(socket), do: send(self(), {:play, video})
if connected?(socket), do: send(self(), {:play, {video, params["t"]}})
{:ok, socket}
end
@ -408,11 +411,17 @@ defmodule AlgoraWeb.VideoLive do
{:noreply, socket |> apply_action(socket.assigns.live_action, params)}
end
def handle_info({:play, video}, socket) do
def handle_info({:play, {video, t}}, socket) do
socket =
socket
|> push_event("play_video", %{
detail: %{player: %{src: video.url, type: Library.player_type(video)}}
detail: %{
player: %{
src: video.url,
type: Library.player_type(video),
currentTime: t
}
}
})
|> push_event("join_chat", %{id: video.id})

View File

@ -82,6 +82,8 @@ defmodule AlgoraWeb.Router do
live_session :default, on_mount: [{AlgoraWeb.UserAuth, :current_user}, AlgoraWeb.Nav] do
live "/", HomeLive, :show
live "/auth/login", SignInLive, :index
live "/cossgpt", COSSGPTLive, :index
live "/og/cossgpt", COSSGPTOGLive, :index
live "/:channel_handle", ChannelLive, :show
live "/:channel_handle/:video_id", VideoLive, :show
end

View File

@ -32,14 +32,17 @@ defmodule Algora.MixProject do
# Type `mix help deps` for examples and options.
defp deps do
[
{:bumblebee, "~> 0.5.3"},
{:castore, "~> 0.1.13"},
{:credo, "~> 1.7", only: [:dev, :test], runtime: false},
{:dialyxir, "~> 1.3", only: [:dev], runtime: false},
{:dns_cluster, "~> 0.1.1"},
{:ecto_network, "~> 1.3.0"},
{:ecto_sql, "~> 3.6"},
{:elixir_make, "~> 0.7.0", runtime: false},
{:esbuild, "~> 0.8", runtime: Mix.env() == :dev},
{:ex_m3u8, "~> 0.9.0"},
{:exla, ">= 0.0.0"},
{:exsync, "~> 0.2", only: :dev},
{:ffmpex, "~> 0.10.0"},
{:finch, "~> 0.13"},
@ -47,6 +50,7 @@ defmodule Algora.MixProject do
{:fly_postgres, "~> 0.3.0"},
{:gettext, "~> 0.18"},
{:heroicons, "~> 0.5.0"},
{:hnswlib, "~> 0.1.0"},
{:jason, "~> 1.2"},
{:libcluster, "~> 3.3.1"},
{:membrane_core, "~> 1.0"},
@ -55,13 +59,14 @@ defmodule Algora.MixProject do
{:mint, "~> 1.0"},
{:oban, "~> 2.16"},
{:phoenix_ecto, "~> 4.4"},
{:phoenix_html, "~> 4.0", override: true},
{:phoenix_html_helpers, "~> 1.0"},
{:phoenix_html, "~> 4.0", override: true},
{:phoenix_live_dashboard, "~> 0.8.3"},
{:phoenix_live_reload, "~> 1.2", only: :dev},
{:phoenix_live_view, "~> 0.20.2"},
{:phoenix, "~> 1.7.11"},
{:plug_cowboy, "~> 2.5"},
{:replicate, "~> 1.2.0"},
{:slugify, "~> 1.3"},
{:swoosh, "~> 1.3"},
{:tailwind, "~> 0.2", runtime: Mix.env() == :dev},

View File

@ -1,13 +1,17 @@
%{
"axon": {:hex, :axon, "0.6.1", "1d042fdba1c1b4413a3d65800524feebd1bc8ed218f8cdefe7a97510c3f427f3", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.6.0 or ~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "d6b0ae2f0dd284f6bf702edcab71e790d6c01ca502dd06c4070836554f5a48e1"},
"bimap": {:hex, :bimap, "1.3.0", "3ea4832e58dc83a9b5b407c6731e7bae87458aa618e6d11d8e12114a17afa4b3", [:mix], [], "hexpm", "bf5a2b078528465aa705f405a5c638becd63e41d280ada41e0f77e6d255a10b4"},
"bumblebee": {:hex, :bumblebee, "0.5.3", "151c215fd6014958dbfc322fe5f31b44d170293f69cfdca419936c81e39b1f64", [:mix], [{:axon, "~> 0.6.1", [hex: :axon, repo: "hexpm", optional: false]}, {:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:nx_image, "~> 0.1.0", [hex: :nx_image, repo: "hexpm", optional: false]}, {:nx_signal, "~> 0.2.0", [hex: :nx_signal, repo: "hexpm", optional: false]}, {:progress_bar, "~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: false]}, {:safetensors, "~> 0.1.3", [hex: :safetensors, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.4", [hex: :tokenizers, repo: "hexpm", optional: false]}, {:unpickler, "~> 0.1.0", [hex: :unpickler, repo: "hexpm", optional: false]}, {:unzip, "~> 0.10.0", [hex: :unzip, repo: "hexpm", optional: false]}], "hexpm", "5518f11e424c431a9cbedc80e0d26525368f0b6e50572a674ff247ec3b26bdd7"},
"bunch": {:hex, :bunch, "1.6.1", "5393d827a64d5f846092703441ea50e65bc09f37fd8e320878f13e63d410aec7", [:mix], [], "hexpm", "286cc3add551628b30605efbe2fca4e38cc1bea89bcd0a1a7226920b3364fe4a"},
"bunch_native": {:hex, :bunch_native, "0.5.0", "8ac1536789a597599c10b652e0b526d8833348c19e4739a0759a2bedfd924e63", [:mix], [{:bundlex, "~> 1.0", [hex: :bundlex, repo: "hexpm", optional: false]}], "hexpm", "24190c760e32b23b36edeb2dc4852515c7c5b3b8675b1a864e0715bdd1c8f80d"},
"bundlex": {:hex, :bundlex, "1.4.5", "ea06cb441af636baaf5232dced24c6b1ee5ccbe7a7cad8a348eb3100fa1d7b52", [:mix], [{:bunch, "~> 1.0", [hex: :bunch, repo: "hexpm", optional: false]}, {:elixir_uuid, "~> 1.2", [hex: :elixir_uuid, repo: "hexpm", optional: false]}, {:qex, "~> 0.5", [hex: :qex, repo: "hexpm", optional: false]}, {:req, "~> 0.4.0", [hex: :req, repo: "hexpm", optional: false]}, {:zarex, "~> 1.0", [hex: :zarex, repo: "hexpm", optional: false]}], "hexpm", "bd4136100d3120740bf8eaa73ad74859d5ccd659cf0b27aa1645590a67a0172b"},
"bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
"castore": {:hex, :castore, "0.1.22", "4127549e411bedd012ca3a308dede574f43819fe9394254ca55ab4895abfa1a2", [:mix], [], "hexpm", "c17576df47eb5aa1ee40cc4134316a99f5cad3e215d5c77b8dd3cfef12a22cac"},
"cc_precompiler": {:hex, :cc_precompiler, "0.1.10", "47c9c08d8869cf09b41da36538f62bc1abd3e19e41701c2cea2675b53c704258", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "f6e046254e53cd6b41c6bacd70ae728011aa82b2742a80d6e2214855c6e06b22"},
"certifi": {:hex, :certifi, "2.12.0", "2d1cca2ec95f59643862af91f001478c9863c2ac9cb6e2f89780bfd8de987329", [:rebar3], [], "hexpm", "ee68d85df22e554040cdb4be100f33873ac6051387baf6a8f6ce82272340ff1c"},
"coerce": {:hex, :coerce, "1.0.1", "211c27386315dc2894ac11bc1f413a0e38505d808153367bd5c6e75a4003d096", [:mix], [], "hexpm", "b44a691700f7a1a15b4b7e2ff1fa30bebd669929ac8aa43cffe9e2f8bf051cf1"},
"combine": {:hex, :combine, "0.10.0", "eff8224eeb56498a2af13011d142c5e7997a80c8f5b97c499f84c841032e429f", [:mix], [], "hexpm", "1b1dbc1790073076580d0d1d64e42eae2366583e7aecd455d1215b0d16f2451b"},
"complex": {:hex, :complex, "0.5.0", "af2d2331ff6170b61bb738695e481b27a66780e18763e066ee2cd863d0b1dd92", [:mix], [], "hexpm", "2683bd3c184466cfb94fad74cbfddfaa94b860e27ad4ca1bffe3bff169d91ef1"},
"connection": {:hex, :connection, "1.1.0", "ff2a49c4b75b6fb3e674bfc5536451607270aac754ffd1bdfe175abe4a6d7a68", [:mix], [], "hexpm", "722c1eb0a418fbe91ba7bd59a47e28008a189d47e37e0e7bb85585a016b2869c"},
"cowboy": {:hex, :cowboy, "2.10.0", "ff9ffeff91dae4ae270dd975642997afe2a1179d94b1887863e43f681a203e26", [:make, :rebar3], [{:cowlib, "2.12.1", [hex: :cowlib, repo: "hexpm", optional: false]}, {:ranch, "1.8.0", [hex: :ranch, repo: "hexpm", optional: false]}], "hexpm", "3afdccb7183cc6f143cb14d3cf51fa00e53db9ec80cdcd525482f5e99bc41d6b"},
"cowboy_telemetry": {:hex, :cowboy_telemetry, "0.4.0", "f239f68b588efa7707abce16a84d0d2acf3a0f50571f8bb7f56a15865aae820c", [:rebar3], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7d98bac1ee4565d31b62d59f8823dfd8356a169e7fcbb83831b8a5397404c9de"},
@ -21,6 +25,7 @@
"ecto": {:hex, :ecto, "3.9.6", "2f420c173efcb2e22fa4f8fc41e75e02b3c5bd4cffef12085cae5418c12e530d", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df17bc06ba6f78a7b764e4a14ef877fe5f4499332c5a105ace11fe7013b72c84"},
"ecto_network": {:hex, :ecto_network, "1.3.0", "1e77fa37c20e0f6a426d3862732f3317b0fa4c18f123d325f81752a491d7304e", [:mix], [{:ecto_sql, ">= 3.0.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:phoenix_html, ">= 0.0.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.14.0", [hex: :postgrex, repo: "hexpm", optional: false]}], "hexpm", "053a5e46ef2837e8ea5ea97c82fa0f5494699209eddd764e663c85f11b2865bd"},
"ecto_sql": {:hex, :ecto_sql, "3.9.0", "2bb21210a2a13317e098a420a8c1cc58b0c3421ab8e3acfa96417dab7817918c", [:mix], [{:db_connection, "~> 2.5 or ~> 2.4.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.9.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.6.0", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.16.0 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a8f3f720073b8b1ac4c978be25fa7960ed7fd44997420c304a4a2e200b596453"},
"elixir_make": {:hex, :elixir_make, "0.7.8", "505026f266552ee5aabca0b9f9c229cbb496c689537c9f922f3eb5431157efc7", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:certifi, "~> 2.0", [hex: :certifi, repo: "hexpm", optional: true]}], "hexpm", "7a71945b913d37ea89b06966e1342c85cfe549b15e6d6d081e8081c493062c07"},
"elixir_uuid": {:hex, :elixir_uuid, "1.2.1", "dce506597acb7e6b0daeaff52ff6a9043f5919a4c3315abb4143f0b00378c097", [:mix], [], "hexpm", "f7eba2ea6c3555cea09706492716b0d87397b88946e6380898c2889d68585752"},
"erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
"esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"},
@ -28,6 +33,7 @@
"ex_aws_s3": {:hex, :ex_aws_s3, "2.5.3", "422468e5c3e1a4da5298e66c3468b465cfd354b842e512cb1f6fbbe4e2f5bdaf", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm", "4f09dd372cc386550e484808c5ac5027766c8d0cd8271ccc578b82ee6ef4f3b8"},
"ex_doc": {:hex, :ex_doc, "0.29.4", "6257ecbb20c7396b1fe5accd55b7b0d23f44b6aa18017b415cb4c2b91d997729", [:mix], [{:earmark_parser, "~> 1.4.31", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "2c6699a737ae46cb61e4ed012af931b57b699643b24dabe2400a8168414bc4f5"},
"ex_m3u8": {:hex, :ex_m3u8, "0.9.0", "54a12463320236aab09402bc69676f665e692636235a2b186a22df507ebc5643", [:mix], [{:nimble_parsec, "~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}, {:typed_struct, "~> 0.3.0", [hex: :typed_struct, repo: "hexpm", optional: false]}], "hexpm", "d57939a90d8da5956264d27a516c5e2ac80b09c8adbe4e3199d7d14c79549b5c"},
"exla": {:hex, :exla, "0.7.0", "27fac40a580f0d3816fe3bf35c50dfc2f99597d26ac7e2aca4a3c62b89bb427f", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nx, "~> 0.7.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.6.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "d3bfc622deb52cec95efc9d76063891afc7cd33e38eddbb01f3385c53e043c40"},
"expo": {:hex, :expo, "0.5.2", "beba786aab8e3c5431813d7a44b828e7b922bfa431d6bfbada0904535342efe2", [:mix], [], "hexpm", "8c9bfa06ca017c9cb4020fabe980bc7fdb1aaec059fd004c2ab3bff03b1c599c"},
"exsync": {:hex, :exsync, "0.3.0", "39ab8b3d4e5fe779a34ad930135145283ebf56069513dfdfaad4e30a04b158c7", [:mix], [{:file_system, "~> 0.2", [hex: :file_system, repo: "hexpm", optional: false]}], "hexpm", "2030d085a14fa5f685d53d97171a21345dddaf2b67a0927263efc6b2cd2bb09f"},
"ffmpex": {:hex, :ffmpex, "0.10.0", "ce29281eac60bf109c05acb4342eecf813a3cd3f08c1bce350423caad86128af", [:mix], [{:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}, {:rambo, "~> 0.3.0", [hex: :rambo, repo: "hexpm", optional: false]}], "hexpm", "de8d81f8c51cc258dcee9a3e0b1568b0659c97be004557d9af47795206cff53b"},
@ -39,7 +45,9 @@
"gettext": {:hex, :gettext, "0.24.0", "6f4d90ac5f3111673cbefc4ebee96fe5f37a114861ab8c7b7d5b30a1108ce6d8", [:mix], [{:expo, "~> 0.5.1", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "bdf75cdfcbe9e4622dd18e034b227d77dd17f0f133853a1c73b97b3d6c770e8b"},
"hackney": {:hex, :hackney, "1.20.1", "8d97aec62ddddd757d128bfd1df6c5861093419f8f7a4223823537bad5d064e2", [:rebar3], [{:certifi, "~> 2.12.0", [hex: :certifi, repo: "hexpm", optional: false]}, {:idna, "~> 6.1.0", [hex: :idna, repo: "hexpm", optional: false]}, {:metrics, "~> 1.0.0", [hex: :metrics, repo: "hexpm", optional: false]}, {:mimerl, "~> 1.1", [hex: :mimerl, repo: "hexpm", optional: false]}, {:parse_trans, "3.4.1", [hex: :parse_trans, repo: "hexpm", optional: false]}, {:ssl_verify_fun, "~> 1.1.0", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}, {:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "fe9094e5f1a2a2c0a7d10918fee36bfec0ec2a979994cff8cfe8058cd9af38e3"},
"heroicons": {:hex, :heroicons, "0.5.3", "ee8ae8335303df3b18f2cc07f46e1cb6e761ba4cf2c901623fbe9a28c0bc51dd", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:phoenix_live_view, ">= 0.18.2", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}], "hexpm", "a210037e8a09ac17e2a0a0779d729e89c821c944434c3baa7edfc1f5b32f3502"},
"hnswlib": {:hex, :hnswlib, "0.1.5", "750bea8627ea60dfdea67421aef34478c31bf1254495ebd43a6a100aaf0523c0", [:make, :mix], [{:cc_precompiler, "~> 0.1.0", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.7.0", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "833a3dcfd917236a4e3dd1917725c32d68f65cbf37888cb2af8d38a4e547f8f3"},
"hpax": {:hex, :hpax, "0.1.2", "09a75600d9d8bbd064cdd741f21fc06fc1f4cf3d0fcc335e5aa19be1a7235c84", [:mix], [], "hexpm", "2c87843d5a23f5f16748ebe77969880e29809580efdaccd615cd3bed628a8c13"},
"httpoison": {:hex, :httpoison, "2.2.1", "87b7ed6d95db0389f7df02779644171d7319d319178f6680438167d7b69b1f3d", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "51364e6d2f429d80e14fe4b5f8e39719cacd03eb3f9a9286e61e216feac2d2df"},
"idna": {:hex, :idna, "6.1.1", "8a63070e9f7d0c62eb9d9fcb360a7de382448200fbbd1b106cc96d3d8099df8d", [:rebar3], [{:unicode_util_compat, "~> 0.7.0", [hex: :unicode_util_compat, repo: "hexpm", optional: false]}], "hexpm", "92376eb7894412ed19ac475e4a86f7b413c1b9fbb5bd16dccd57934157944cea"},
"jason": {:hex, :jason, "1.4.1", "af1504e35f629ddcdd6addb3513c3853991f694921b1b9368b0bd32beb9f1b63", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "fbb01ecdfd565b56261302f7e1fcc27c4fb8f32d56eab74db621fc154604a7a1"},
"libcluster": {:hex, :libcluster, "3.3.1", "e7a4875cd1290cee7a693d6bd46076863e9e433708b01339783de6eff5b7f0aa", [:mix], [{:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "b575ca63c1cd84e01f3fa0fc45e6eb945c1ee7ae8d441d33def999075e9e5398"},
@ -76,6 +84,9 @@
"nimble_parsec": {:hex, :nimble_parsec, "1.4.0", "51f9b613ea62cfa97b25ccc2c1b4216e81df970acd8e16e8d1bdc58fef21370d", [:mix], [], "hexpm", "9c565862810fb383e9838c1dd2d7d2c437b3d13b267414ba6af33e50d2d1cf28"},
"nimble_pool": {:hex, :nimble_pool, "0.2.6", "91f2f4c357da4c4a0a548286c84a3a28004f68f05609b4534526871a22053cde", [:mix], [], "hexpm", "1c715055095d3f2705c4e236c18b618420a35490da94149ff8b580a2144f653f"},
"numbers": {:hex, :numbers, "5.2.4", "f123d5bb7f6acc366f8f445e10a32bd403c8469bdbce8ce049e1f0972b607080", [:mix], [{:coerce, "~> 1.0", [hex: :coerce, repo: "hexpm", optional: false]}, {:decimal, "~> 1.9 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "eeccf5c61d5f4922198395bf87a465b6f980b8b862dd22d28198c5e6fab38582"},
"nx": {:hex, :nx, "0.7.1", "5f6376e3d18408116e8a84b8f4ac851fb07dfe61764a5410ebf0b5dcb69c1b7e", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "e3ddd6a3f2a9bac79c67b3933368c25bb5ec814a883fc68aba8fd8a236751777"},
"nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"},
"nx_signal": {:hex, :nx_signal, "0.2.0", "e1ca0318877b17c81ce8906329f5125f1e2361e4c4235a5baac8a95ee88ea98e", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "7247e5e18a177a59c4cb5355952900c62fdeadeb2bad02a9a34237b68744e2bb"},
"oban": {:hex, :oban, "2.17.3", "ddfd5710aadcd550d2e174c8d73ce5f1865601418cf54a91775f20443fb832b7", [:mix], [{:ecto_sql, "~> 3.6", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:ecto_sqlite3, "~> 0.9", [hex: :ecto_sqlite3, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.16", [hex: :postgrex, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "452eada8bfe0d0fefd0740ab5fa8cf3ef6c375df0b4a3c3805d179022a04738a"},
"parse_trans": {:hex, :parse_trans, "3.4.1", "6e6aa8167cb44cc8f39441d05193be6e6f4e7c2946cb2759f015f8c56b76e5ff", [:rebar3], [], "hexpm", "620a406ce75dada827b82e453c19cf06776be266f5a67cff34e1ef2cbb60e49a"},
"phoenix": {:hex, :phoenix, "1.7.11", "1d88fc6b05ab0c735b250932c4e6e33bfa1c186f76dcf623d8dd52f07d6379c7", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "b1ec57f2e40316b306708fe59b92a16b9f6f4bf50ccfa41aa8c7feb79e0ec02a"},
@ -90,12 +101,17 @@
"plug": {:hex, :plug, "1.15.3", "712976f504418f6dff0a3e554c40d705a9bcf89a7ccef92fc6a5ef8f16a30a97", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "cc4365a3c010a56af402e0809208873d113e9c38c401cabd88027ef4f5c01fd2"},
"plug_cowboy": {:hex, :plug_cowboy, "2.7.0", "3ae9369c60641084363b08fe90267cbdd316df57e3557ea522114b30b63256ea", [:mix], [{:cowboy, "~> 2.7.0 or ~> 2.8.0 or ~> 2.9.0 or ~> 2.10.0", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "d85444fb8aa1f2fc62eabe83bbe387d81510d773886774ebdcb429b3da3c1a4a"},
"plug_crypto": {:hex, :plug_crypto, "2.0.0", "77515cc10af06645abbfb5e6ad7a3e9714f805ae118fa1a70205f80d2d70fe73", [:mix], [], "hexpm", "53695bae57cc4e54566d993eb01074e4d894b65a3766f1c43e2c61a1b0f45ea9"},
"polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"},
"postgrex": {:hex, :postgrex, "0.16.5", "fcc4035cc90e23933c5d69a9cd686e329469446ef7abba2cf70f08e2c4b69810", [:mix], [{:connection, "~> 1.1", [hex: :connection, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "edead639dc6e882618c01d8fc891214c481ab9a3788dfe38dd5e37fd1d5fb2e8"},
"progress_bar": {:hex, :progress_bar, "3.0.0", "f54ff038c2ac540cfbb4c2bfe97c75e7116ead044f3c2b10c9f212452194b5cd", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "6981c2b25ab24aecc91a2dc46623658e1399c21a2ae24db986b90d678530f2b7"},
"qex": {:hex, :qex, "0.5.1", "0d82c0f008551d24fffb99d97f8299afcb8ea9cf99582b770bd004ed5af63fd6", [:mix], [], "hexpm", "935a39fdaf2445834b95951456559e9dc2063d0a055742c558a99987b38d6bab"},
"rambo": {:hex, :rambo, "0.3.4", "8962ac3bd1a633ee9d0e8b44373c7913e3ce3d875b4151dcd060886092d2dce7", [:mix], [], "hexpm", "0cc54ed089fbbc84b65f4b8a774224ebfe60e5c80186fafc7910b3e379ad58f1"},
"ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"},
"ratio": {:hex, :ratio, "3.0.2", "60a5976872a4dc3d873ecc57eed1738589e99d1094834b9c935b118231297cfb", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}, {:numbers, "~> 5.2.0", [hex: :numbers, repo: "hexpm", optional: false]}], "hexpm", "3a13ed5a30ad0bfd7e4a86bf86d93d2b5a06f5904417d38d3f3ea6406cdfc7bb"},
"replicate": {:hex, :replicate, "1.2.0", "802d6826a89a11aded0d3586d6d0418db3c20590f5cc04727b9951e0efe566e9", [:mix], [{:httpoison, "~> 2.0", [hex: :httpoison, repo: "hexpm", optional: false]}, {:jason, "~> 1.2", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "6eeeef65de231784937b0f4c0336fe247caf5845930d2ee667d2b3ed5cec888d"},
"req": {:hex, :req, "0.4.8", "2b754a3925ddbf4ad78c56f30208ced6aefe111a7ea07fb56c23dccc13eb87ae", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.9", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 1.6 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "7146e51d52593bb7f20d00b5308a5d7d17d663d6e85cd071452b613a8277100c"},
"rustler_precompiled": {:hex, :rustler_precompiled, "0.7.1", "ecadf02cc59a0eccbaed6c1937303a5827fbcf60010c541595e6d3747d3d0f9f", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "b9e4657b99a1483ea31502e1d58c464bedebe9028808eda45c3a429af4550c66"},
"safetensors": {:hex, :safetensors, "0.1.3", "7ff3c22391e213289c713898481d492c9c28a49ab1d0705b72630fb8360426b2", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "fe50b53ea59fde4e723dd1a2e31cfdc6013e69343afac84c6be86d6d7c562c14"},
"shmex": {:hex, :shmex, "0.5.0", "7dc4fb1a8bd851085a652605d690bdd070628717864b442f53d3447326bcd3e8", [:mix], [{:bunch_native, "~> 0.5.0", [hex: :bunch_native, repo: "hexpm", optional: false]}, {:bundlex, "~> 1.0", [hex: :bundlex, repo: "hexpm", optional: false]}], "hexpm", "b67bb1e22734758397c84458dbb746519e28eac210423c267c7248e59fc97bdc"},
"slugify": {:hex, :slugify, "1.3.1", "0d3b8b7e5c1eeaa960e44dce94382bee34a39b3ea239293e457a9c5b47cc6fd3", [:mix], [], "hexpm", "cb090bbeb056b312da3125e681d98933a360a70d327820e4b7f91645c4d8be76"},
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"},
@ -108,12 +124,16 @@
"telemetry_poller": {:hex, :telemetry_poller, "1.0.0", "db91bb424e07f2bb6e73926fcafbfcbcb295f0193e0a00e825e589a0a47e8453", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b3a24eafd66c3f42da30fc3ca7dda1e9d546c12250a2d60d7b81d264fbec4f6e"},
"thumbnex": {:hex, :thumbnex, "0.5.0", "9f3c20c8c70d17e108710830e1495548b45c7433f30dc318f1075d76eb6f7f00", [:mix], [{:ffmpex, "~> 0.10.0", [hex: :ffmpex, repo: "hexpm", optional: false]}, {:mogrify, "~> 0.9.0", [hex: :mogrify, repo: "hexpm", optional: false]}], "hexpm", "a187948110e2de8dc2e9a73d5a3489398ba6a44d285293c174b6285717c5e5fc"},
"timex": {:hex, :timex, "3.7.11", "bb95cb4eb1d06e27346325de506bcc6c30f9c6dea40d1ebe390b262fad1862d1", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.20", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.1", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "8b9024f7efbabaf9bd7aa04f65cf8dcd7c9818ca5737677c7b76acbc6a94d1aa"},
"tokenizers": {:hex, :tokenizers, "0.4.0", "140283ca74a971391ddbd83cd8cbdb9bd03736f37a1b6989b82d245a95e1eb97", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "ef1a9824f5a893cd3b831c0e5b3d72caa250d2ec462035cc6afef6933b13a82e"},
"turbojpeg": {:hex, :turbojpeg, "0.4.0", "02616e44a70788e40bfc1fdbbdd8bc4e4615cd7d5ced5614b2aefb60a8acbda7", [:mix], [{:bundlex, "~> 1.4.0", [hex: :bundlex, repo: "hexpm", optional: false]}, {:membrane_core, "~> 1.0", [hex: :membrane_core, repo: "hexpm", optional: false]}, {:membrane_raw_video_format, "~> 0.3.0", [hex: :membrane_raw_video_format, repo: "hexpm", optional: false]}, {:unifex, "~> 1.1.0", [hex: :unifex, repo: "hexpm", optional: false]}], "hexpm", "53759d41f6e7d63805dc014db11b5c8e9274c5e67caea46d8b7f314dcdf51431"},
"typed_struct": {:hex, :typed_struct, "0.3.0", "939789e3c1dca39d7170c87f729127469d1315dcf99fee8e152bb774b17e7ff7", [:mix], [], "hexpm", "c50bd5c3a61fe4e198a8504f939be3d3c85903b382bde4865579bc23111d1b6d"},
"tzdata": {:hex, :tzdata, "1.1.1", "20c8043476dfda8504952d00adac41c6eda23912278add38edc140ae0c5bcc46", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "a69cec8352eafcd2e198dea28a34113b60fdc6cb57eb5ad65c10292a6ba89787"},
"unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"},
"unifex": {:hex, :unifex, "1.1.0", "26b1bcb6c3b3454e1ea15f85b2e570aaa5b5c609566aa9f5c2e0a8b213379d6b", [:mix], [{:bunch, "~> 1.0", [hex: :bunch, repo: "hexpm", optional: false]}, {:bundlex, "~> 1.0", [hex: :bundlex, repo: "hexpm", optional: false]}, {:shmex, "~> 0.5.0", [hex: :shmex, repo: "hexpm", optional: false]}], "hexpm", "d8f47e9e3240301f5b20eec5792d1d4341e1a3a268d94f7204703b48da4aaa06"},
"unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"},
"unzip": {:hex, :unzip, "0.10.0", "374e0059e48e982076f3fd22cd4817ab11016c1bae3f09421511901ddda95c5c", [:mix], [], "hexpm", "101c06b0fa97a858a83beb618f4bc20370624f73ab3954f756d9b52194056de6"},
"websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"},
"websock_adapter": {:hex, :websock_adapter, "0.5.5", "9dfeee8269b27e958a65b3e235b7e447769f66b5b5925385f5a569269164a210", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "4b977ba4a01918acbf77045ff88de7f6972c2a009213c515a445c48f224ffce9"},
"xla": {:hex, :xla, "0.6.0", "67bb7695efa4a23b06211dc212de6a72af1ad5a9e17325e05e0a87e4c241feb8", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "dd074daf942312c6da87c7ed61b62fb1a075bced157f1cc4d47af2d7c9f44fb7"},
"zarex": {:hex, :zarex, "1.0.5", "58239e3ee5d75f343262bb4df5cf466555a1c689f920e5d3651a9333972f7c7e", [:mix], [], "hexpm", "9fb72ef0567c2b2742f5119a1ba8a24a2fabb21b8d09820aefbf3e592fa9a46a"},
}

View File

@ -0,0 +1,21 @@
defmodule Algora.Repo.Local.Migrations.CreateSegments do
use Ecto.Migration
def change do
create table(:segments) do
add :body, :text
add :start, :float
add :end, :float
add :embedding, :map
add :starting_subtitle_id, references(:subtitles, on_delete: :nothing), null: false
add :ending_subtitle_id, references(:subtitles, on_delete: :nothing), null: false
add :video_id, references(:videos, on_delete: :nothing), null: false
timestamps()
end
create index(:segments, [:video_id])
create index(:segments, [:starting_subtitle_id])
create index(:segments, [:ending_subtitle_id])
end
end

Binary file not shown.

After

Width:  |  Height:  |  Size: 547 KiB

295
scripts/cossgpt.livemd Normal file
View File

@ -0,0 +1,295 @@
# COSSgpt
```elixir
import Ecto.Query
import Ecto.Changeset
alias Algora.{Accounts, Library, Repo, Storage, Cache, ML}
IEx.configure(inspect: [charlists: :as_lists])
if Code.ensure_loaded?(ExSync) && function_exported?(ExSync, :register_group_leader, 0) do
ExSync.register_group_leader()
end
```
## Section
```elixir
defmodule COSSgpt do
@dir "/home/zaf/Desktop/podcast audio"
alias Algora.Library.Video
def transcribe_video(id, filename) do
video = Library.get_video!(id)
index = ML.load_index!()
IO.puts("\n⌛ processing #{video.title}")
video =
case video.uuid do
nil ->
video |> change() |> Video.put_video_uuid() |> Repo.update!()
_ ->
video
end
slug = Video.slug(video)
IO.puts("- uploading mp3 to tigris")
Cache.fetch("#{slug}/upload", fn ->
Storage.upload_from_filename("#{@dir}/#{filename}", "#{video.uuid}/index.mp3")
:ok
end)
IO.puts("- transcribing audio")
Cache.fetch("#{slug}/transcription", fn ->
transcription = ML.transcribe_video("#{video.url_root}/index.mp3")
:ok =
transcription["chunks"]
|> Enum.map(fn %{"text" => text, "timestamp" => [tstart, tend]} ->
%Library.Subtitle{
body: text,
start: :erlang.float(tstart),
end: :erlang.float(tend || video.duration),
video_id: video.id
}
end)
|> Enum.each(&Repo.insert!/1)
transcription
end)
IO.puts("- chunking transcript")
chunks = ML.chunk(video)
IO.puts("- creating embeddings")
embeddings =
Cache.fetch("#{slug}/embeddings", fn ->
ML.create_embeddings(chunks)
end)
IO.puts("- creating segments")
segments =
Cache.fetch("#{slug}/segments", fn ->
segments =
Enum.zip(chunks, embeddings)
|> Enum.map(fn {chunk, embedding} ->
%Library.Segment{chunk | embedding: embedding["embedding"]}
end)
|> Enum.map(&Repo.insert!/1)
ML.add_embeddings(index, segments)
segments
end)
segments
end
end
```
```elixir
videos = [
{6333, "Supertokens Interview Highlights.mp3"},
{6339, "Hanko full interview.mp3"},
{6407, "OpenBB Full Interview.mp3"},
{6422, "Signoz Interview Highlights.mp3"},
{6390, "Remotion Interview Highlights.mp3"},
{6305, "Nango Interview Highlights.mp3"},
{6347, "Linen Interview Highlights.mp3"},
{6429, "Medplum Full Interview.mp3"},
{6393, "Windmill Interview Highlights.mp3"},
{6419, "Elementary Data Highlights.mp3"},
{6400, "Sematic_Highlights_final.mp3"},
{6300, "Infisical Interview Highlights.mp3"},
{6421, "Novu Interview Highlights.mp3"},
{6330, "Hydra Full Interview.mp3"},
{6309, "shuttle.rs Interview Highlights.mp3"},
{6402, "Ivy Interview Highlights.mp3"},
{6411, "Nextcloud Interview Highlights.mp3"},
{6425, "Okteto Interview Highlights .mp3"},
{6405, "Tigris Data Interview Highlights.mp3"},
{6415, "Refine Interview Highlights.mp3"},
{6413, "Cal.com Interview Highlights.mp3"},
{6302, "Hoppscotch Interview Highlights.mp3"},
{6334, "MAIN-Documenso.mp3"},
{6331, "GiteaFull.mp3"},
{6336, "maybefull.mp3"},
{6391, "NuxtFull.mp3"},
{6327, "OramaFull.mp3"},
{6430, "FinalTimeplusFullInterview.mp3"},
{6295, "tursofull.mp3"},
{6324, "unkeyfull.mp3"},
{8540, "Tauri-Full-Final.mp3"},
{8541, "TailcallFull-correct.mp3"},
{8539, "millionfull.mp3"},
{6426, "Scalarfullinterview.mp3"},
{6387, "IHP Interview Highlights.mp3"},
{8196, "peer-1.mp3"},
{8236, "peer-2.mp3"},
{8412, "peer-3.mp3"},
{8428, "peer-4.mp3"},
{8231, "andreas-1.mp3"},
{8411, "andreas-2.mp3"},
{8426, "andreas-3.mp3"}
]
for {id, filename} <- videos do
COSSgpt.transcribe_video(id, filename)
end
```
```elixir
obsolete_segments = [
937,
938,
939,
940,
941,
942,
932,
933,
934,
935,
936,
1572,
1574,
1575,
1573,
1275,
1276,
1277,
1278,
1279,
1280,
1281,
1282,
1283,
1284,
1289,
1294,
1299,
1285,
1290,
1295,
1300,
1286,
1291,
1296,
1301,
1306,
1311,
1316,
1321,
1326,
1331,
1336,
1341,
1346,
1287,
1292,
1297,
1288,
1293,
1298,
1302,
1303,
1304,
1305,
1307,
1308,
1309,
1310,
1312,
1313,
1314,
1320,
1319,
1324,
1329,
1334,
1339,
1344,
1349,
1354,
1359,
1364,
1369,
1315,
1325,
1330,
1335,
1340,
1345,
1350,
1355,
1360,
1365,
1370,
1382,
1317,
1322,
1327,
1332,
1337,
1342,
1347,
1352,
1357,
1362,
1367,
1372,
1377,
1318,
1323,
1328,
1333,
1338,
1343,
1348,
1353,
1358,
1363,
1368,
1373,
1351,
1356,
1361,
1366,
1371,
1376,
1381,
1386,
1391,
1374,
1379,
1384,
1389,
1375,
1380,
1385,
1390,
1378,
1383,
1388,
1387
]
index = ML.load_index!()
for id <- obsolete_segments do
:ok = HNSWLib.Index.mark_deleted(index, id)
end
ML.save_index(index)
```