diff --git a/app/ipc/ipc_main.ts b/app/ipc/ipc_main.ts
index a1d3c7e5..2abec826 100644
--- a/app/ipc/ipc_main.ts
+++ b/app/ipc/ipc_main.ts
@@ -6,7 +6,7 @@ import { setMenuBar, showMenuBar, showContextMenu } from '../main_process/menu';
import { sendAll } from '../main_process/windowList';
import { serverInfo } from '../main_process/server';
import { ServerInfo } from '../main_process/types';
-import { logLine, logFilePath, LogLevel, LogSource } from '../src/util/log';
+import { logFilePath } from '../src/util/log';
ipcMain.handle('open-file', (event, options) => {
const win = BrowserWindow.fromWebContents(event.sender);
@@ -104,8 +104,3 @@ export function exportDebugLog(window: BrowserWindow): void {
ipcMain.handle('get-home-path', () => {
return app.getPath('home');
});
-
-ipcMain.handle('log-line', (_event, source: LogSource, level: LogLevel, ...args: any[]) => {
- assertSome(logLine);
- logLine(source, level, ...args.map((x) => JSON.stringify(x)));
-});
diff --git a/app/main_process/index.ts b/app/main_process/index.ts
index 601d4fd1..1a578553 100644
--- a/app/main_process/index.ts
+++ b/app/main_process/index.ts
@@ -24,6 +24,14 @@ export const createWindow = (): void => {
show: false,
});
+ let dontSendLog = false;
+ window.webContents.on('console-message', (_e, level, message) => {
+ if (message == 'server stderr') dontSendLog = true;
+ logLine && !dontSendLog && logLine(LogSource.RendererProcess, NumericLogLevels[level], message);
+
+ if (message == 'console.groupEnd') dontSendLog = false;
+ });
+
window.webContents.on('new-window', (event, url, frameName, disposition, options) => {
if (frameName === 'modal') {
event.preventDefault();
@@ -151,4 +159,4 @@ import './server';
import { windowList } from './windowList';
import { applyMenuBar, setMenuBar } from './menu';
import { isRunningInTest } from '../src/util';
-import { initMainProcessLog } from '../src/util/log';
+import { LogSource, NumericLogLevels, initMainProcessLog, logLine } from '../src/util/log';
diff --git a/app/main_process/server.ts b/app/main_process/server.ts
index 85b53d45..9137da2e 100644
--- a/app/main_process/server.ts
+++ b/app/main_process/server.ts
@@ -6,6 +6,7 @@ import { app, dialog } from 'electron';
import { publishServerInfo, publishServerStderr } from '../ipc/ipc_main';
import { ServerInfo } from './types';
import { isRunningInTest } from '../src/util';
+import { LogLevel, LogSource, logLine } from '../src/util/log';
function findServer() {
const possibilities = [
@@ -74,7 +75,7 @@ function startServer() {
return;
}
serverProcess.stdout.on('data', (data: Buffer) => {
- console.log('server-stdout', data.toString());
+ logLine && logLine(LogSource.ServerProcess, LogLevel.Log, data);
try {
const parsed_data: ServerStartingMessage | ServerStartedMessage = JSON.parse(data.toString());
if (parsed_data.msg == 'server_starting') {
@@ -88,7 +89,7 @@ function startServer() {
});
serverProcess.stderr.on('data', (data: Buffer) => {
- console.log(`server-stderr: \n${data}`);
+ logLine && logLine(LogSource.ServerProcess, LogLevel.Error, data);
publishServerStderr(data.toString());
});
diff --git a/app/scripts/dev.js b/app/scripts/dev.js
index 36eee4d9..2e3f5e79 100644
--- a/app/scripts/dev.js
+++ b/app/scripts/dev.js
@@ -1,14 +1,13 @@
-const { createServer, build, createLogger } = require('vite');
+const { createServer, build } = require('vite');
const electronPath = require('electron');
const { spawn } = require('child_process');
const mode = (process.env.MODE = process.env.MODE || 'development');
-const LOG_LEVEL = 'warn';
const sharedConfig = {
mode,
build: {
watch: {},
},
- logLevel: LOG_LEVEL,
+ logLevel: 'warn',
};
const getWatcher = ({ name, configFile, writeBundle }) => {
@@ -29,10 +28,6 @@ const setupMainPackageWatcher = (viteDevServer) => {
process.env.VITE_DEV_SERVER_URL = `${protocol}//${host}:${port}${path}`;
}
- const logger = createLogger(LOG_LEVEL, {
- prefix: '[main]',
- });
-
let spawnProcess = null;
return getWatcher({
@@ -44,18 +39,10 @@ const setupMainPackageWatcher = (viteDevServer) => {
spawnProcess = null;
}
- spawnProcess = spawn(String(electronPath), [
- `${dir}/start.cjs.js`,
- `--remote-debugging-port=${process.env.DEBUGGER_PORT}`,
- ]);
-
- spawnProcess.stdout.on(
- 'data',
- (d) => d.toString().trim() && logger.warn(d.toString(), { timestamp: true })
- );
- spawnProcess.stderr.on(
- 'data',
- (d) => d.toString().trim() && logger.error(d.toString(), { timestamp: true })
+ spawnProcess = spawn(
+ String(electronPath),
+ [`${dir}/start.cjs.js`, `--remote-debugging-port=${process.env.DEBUGGER_PORT}`],
+ { stdio: 'inherit' }
);
},
});
diff --git a/app/src/index.tsx b/app/src/index.tsx
index e8c20ad5..2da3faf1 100644
--- a/app/src/index.tsx
+++ b/app/src/index.tsx
@@ -4,11 +4,9 @@ import * as ReactDOM from 'react-dom';
import './index.css';
import App from './components/App';
-import { exportDebugLogsToDisk, initRendererLog } from './util/log';
+import { exportDebugLogsToDisk } from './util/log';
import { subscribeExportDebugLog } from '../ipc/ipc_renderer';
-initRendererLog();
-
subscribeExportDebugLog((event, mainProcessLogPath) => exportDebugLogsToDisk(mainProcessLogPath));
const anyModule = module as any;
diff --git a/app/src/pages/LanguageSettings.tsx b/app/src/pages/LanguageSettings.tsx
index 88839b9c..0a491d5c 100644
--- a/app/src/pages/LanguageSettings.tsx
+++ b/app/src/pages/LanguageSettings.tsx
@@ -103,21 +103,23 @@ function ModelTable({
isDefault={null}
action={
- dispatch(cancelDownload(model.task_uuid))}
- hoverChild={}
- defaultChild={
-
- }
- />
+
+ dispatch(cancelDownload(model.task_uuid))}
+ hoverChild={}
+ defaultChild={
+
+ }
+ />
+
}
key={model.model_id}
@@ -221,19 +223,19 @@ function HoverSwitcher({
hoverChild: JSX.Element;
defaultChild: JSX.Element;
}): JSX.Element {
- const [shownChild, setShownChild] = useState(defaultChild);
+ const [hover, setHover] = useState(false);
return (
{
- setShownChild(defaultChild);
+ setHover(false);
}}
onMouseOver={() => {
- setShownChild(hoverChild);
+ setHover(true);
}}
>
- {shownChild}
+ {hover ? hoverChild : defaultChild}
);
}
@@ -262,6 +264,16 @@ export function LanguageSettingsPage(): JSX.Element {
+
+ Whisper Models for {language.lang}
+
+
+
Transcription Models for {language.lang}
diff --git a/app/src/pages/ModelManager.tsx b/app/src/pages/ModelManager.tsx
index db6410e2..08f278b8 100644
--- a/app/src/pages/ModelManager.tsx
+++ b/app/src/pages/ModelManager.tsx
@@ -41,6 +41,7 @@ export function ModelManagerPage(): JSX.Element {
Language
Transcription Models
+ Whisper Models
@@ -61,6 +62,11 @@ export function ModelManagerPage(): JSX.Element {
lang={lang.lang}
downloaded={downloaded}
/>
+
diff --git a/app/src/state/models.ts b/app/src/state/models.ts
index 672f1e13..db36bf6b 100644
--- a/app/src/state/models.ts
+++ b/app/src/state/models.ts
@@ -25,6 +25,7 @@ export interface Model {
export interface Language {
lang: string;
transcription_models: Model[];
+ whisper_models: Model[];
}
export type DownloadingModel = Model & {
diff --git a/app/src/util/log.ts b/app/src/util/log.ts
index d243528a..21459ef3 100644
--- a/app/src/util/log.ts
+++ b/app/src/util/log.ts
@@ -1,29 +1,41 @@
import fs, { createWriteStream } from 'fs';
import path from 'path';
import JSZip from 'jszip';
-import { getHomePath, saveFile, sendLogLine } from '../../ipc/ipc_renderer';
+import { getHomePath, saveFile } from '../../ipc/ipc_renderer';
import { isRunningInTest } from './index';
import glob from 'glob';
import { app } from 'electron';
export enum LogLevel {
Log,
- Trace,
- Debug,
Info,
Warn,
Error,
- GroupCollapsed,
- GroupEnd,
}
+export const NumericLogLevels = [LogLevel.Log, LogLevel.Info, LogLevel.Warn, LogLevel.Error];
+
export enum LogSource {
MainProcess,
RendererProcess,
+ ServerProcess,
}
export let logFilePath: string | null = null;
-let oldLog: ((...args: any[]) => void) | null = null;
+
+const buffer: string[] = [];
+function write(str: string) {
+ buffer.push(str);
+
+ const try_fn = () => {
+ if (process.stdout.writableLength == 0) {
+ process.stdout.write(buffer.shift() || '');
+ } else {
+ setTimeout(try_fn, 10);
+ }
+ };
+ try_fn();
+}
function log(file: number, source: LogSource, level: LogLevel, ...args: any[]) {
const date = new Date().toISOString();
@@ -36,7 +48,23 @@ function log(file: number, source: LogSource, level: LogLevel, ...args: any[]) {
level: level_str,
args: string_args,
});
- if (oldLog !== null) oldLog(log_line);
+
+ const FgGreen = '\x1b[32m';
+ const FgBlue = '\x1b[34m';
+ const FgYellow = '\x1b[33m';
+ const Reset = '\x1b[0m';
+ const source_color = [FgGreen, FgBlue, FgYellow][source];
+
+ write(
+ args
+ .join('\n')
+ .split('\n')
+ .map(
+ (line) =>
+ `${source_color}[${source_str.substring(0, 4)}]${Reset} ${level_str.padEnd(5)} | ${line}`
+ )
+ .join('\n') + '\n'
+ );
fs.writeSync(file, log_line + '\n');
fs.fsyncSync(file);
}
@@ -64,24 +92,15 @@ export function initMainProcessLog(): void {
logFilePath = path.join(log_dir, fileName);
const file = fs.openSync(logFilePath, 'w');
console.log('Init logging into', logFilePath);
- oldLog = console.log;
console.log = (...args) => log(file, LogSource.MainProcess, LogLevel.Log, ...args);
- console.trace = (...args) => log(file, LogSource.MainProcess, LogLevel.Trace, ...args);
- console.debug = (...args) => log(file, LogSource.MainProcess, LogLevel.Debug, ...args);
+ console.trace = (...args) => log(file, LogSource.MainProcess, LogLevel.Log, ...args);
+ console.debug = (...args) => log(file, LogSource.MainProcess, LogLevel.Log, ...args);
console.info = (...args) => log(file, LogSource.MainProcess, LogLevel.Info, ...args);
console.warn = (...args) => log(file, LogSource.MainProcess, LogLevel.Warn, ...args);
console.error = (...args) => log(file, LogSource.MainProcess, LogLevel.Error, ...args);
logLine = (...args) => log(file, ...args);
- const oldGroupCollapsed = console.groupCollapsed;
- console.groupCollapsed = (...args) => {
- log(file, LogSource.MainProcess, LogLevel.GroupCollapsed, ...args);
- oldGroupCollapsed(...args);
- };
- const oldGroupEnd = console.groupEnd;
- console.groupEnd = (...args) => {
- log(file, LogSource.MainProcess, LogLevel.GroupEnd, ...args);
- oldGroupEnd(...args);
- };
+ console.groupCollapsed = () => {};
+ console.groupEnd = () => {};
}
export async function exportDebugLogsToDisk(file: string): Promise {
@@ -108,25 +127,3 @@ export async function exportDebugLogsToDisk(file: string): Promise {
.on('error', reject);
});
}
-
-type KeyOfType = keyof {
- [P in keyof T as T[P] extends V ? P : never]: any;
-};
-
-function _mapLogFn(key: KeyOfType void>, level: LogLevel) {
- const _oldFn: (...args: any[]) => void = console[key];
- console[key] = (...args: any[]) => {
- _oldFn(...args);
- sendLogLine(level, ...args);
- };
-}
-export function initRendererLog(): void {
- _mapLogFn('log', LogLevel.Log);
- _mapLogFn('trace', LogLevel.Trace);
- _mapLogFn('debug', LogLevel.Debug);
- _mapLogFn('info', LogLevel.Info);
- _mapLogFn('warn', LogLevel.Warn);
- _mapLogFn('error', LogLevel.Error);
- _mapLogFn('groupCollapsed', LogLevel.GroupCollapsed);
- _mapLogFn('groupEnd', LogLevel.GroupEnd);
-}
diff --git a/server/app/models.py b/server/app/models.py
index 0f0bcbc6..139805b3 100644
--- a/server/app/models.py
+++ b/server/app/models.py
@@ -8,6 +8,7 @@
from urllib.parse import urlparse
from zipfile import ZipFile
+import huggingface_hub
import requests
import yaml
from vosk import Model
@@ -40,7 +41,7 @@ class ModelDescription:
size: str
type: str
lang: str
- compressed: bool = field(default=False)
+ download_type: str = field(default=False)
model_id: str = field(default=None)
def __post_init__(self):
@@ -58,9 +59,10 @@ def is_downloaded(self) -> bool:
class Language:
lang: str
transcription_models: List[ModelDescription] = field(default_factory=list)
+ whisper_models: List[ModelDescription] = field(default_factory=list)
def all_models(self):
- return self.transcription_models
+ return self.transcription_models + self.whisper_models
class ModelDefaultDict(defaultdict):
@@ -81,6 +83,8 @@ def __init__(self):
models[model_description.model_id] = model_description
if model["type"] == "transcription":
languages[lang].transcription_models.append(model_description)
+ elif model["type"] == "whisper":
+ languages[lang].whisper_models.append(model_description)
self.available = dict(languages)
self.model_descriptions = models
@@ -122,38 +126,61 @@ def get(self, model_id: str) -> Union[Model]:
def download(self, model_id: str, task_uuid: str):
task: DownloadModelTask = tasks.get(task_uuid)
model = self.get_model_description(model_id)
- with tempfile.TemporaryFile(dir=CACHE_DIR) as f:
- response = requests.get(model.url, stream=True)
- task.total = int(response.headers.get("content-length"))
- task.state = DownloadModelState.DOWNLOADING
-
- for data in response.iter_content(
- chunk_size=max(int(task.total / 1000), 1024 * 1024)
- ):
- task.add_progress(len(data))
-
- f.write(data)
- if task.canceled:
- return
-
- task.state = DownloadModelState.EXTRACTING
- if model.compressed:
- with ZipFile(f) as archive:
- target_dir = model.path()
- for info in archive.infolist():
- if info.is_dir():
- continue
- path = target_dir / Path("/".join(info.filename.split("/")[1:]))
- path.parent.mkdir(exist_ok=True, parents=True)
-
- source = archive.open(info.filename)
- target = open(path, "wb")
- with source, target:
- shutil.copyfileobj(source, target)
- else:
- f.seek(0)
- with open(model.path(), "wb") as target:
- shutil.copyfileobj(f, target)
+
+ if model.download_type.startswith("http"):
+ with tempfile.TemporaryFile(dir=CACHE_DIR) as f:
+ response = requests.get(model.url, stream=True)
+ task.total = int(response.headers.get("content-length"))
+ task.state = DownloadModelState.DOWNLOADING
+
+ for data in response.iter_content(
+ chunk_size=max(int(task.total / 1000), 1024 * 1024)
+ ):
+ task.add_progress(len(data))
+
+ f.write(data)
+ if task.canceled:
+ return
+
+ task.state = DownloadModelState.EXTRACTING
+ if model.download_type.endswith("+zip"):
+ with ZipFile(f) as archive:
+ target_dir = model.path()
+ for info in archive.infolist():
+ if info.is_dir():
+ continue
+ path = target_dir / Path(
+ "/".join(info.filename.split("/")[1:])
+ )
+ path.parent.mkdir(exist_ok=True, parents=True)
+
+ source = archive.open(info.filename)
+ target = open(path, "wb")
+ with source, target:
+ shutil.copyfileobj(source, target)
+ else:
+ f.seek(0)
+ with open(model.path(), "wb") as target:
+ shutil.copyfileobj(f, target)
+ elif model.download_type == "huggingface":
+ api = huggingface_hub.HfApi()
+ repo_info = api.repo_info(model.url, files_metadata=True)
+ task.total = sum(f.size for f in repo_info.siblings)
+ with tempfile.TemporaryDirectory(dir=CACHE_DIR) as dir:
+ for f in repo_info.siblings:
+ url = huggingface_hub.hf_hub_url(model.url, f.rfilename)
+ with open(Path(dir) / f.rfilename, "wb") as file:
+ task.state = DownloadModelState.DOWNLOADING
+ response = requests.get(url, stream=True)
+ for data in response.iter_content(
+ chunk_size=max(int(task.total / 1000), 1024 * 1024)
+ ):
+ task.add_progress(len(data))
+
+ file.write(data)
+ if task.canceled:
+ return
+ shutil.copytree(dir, model.path())
task.state = DownloadModelState.DONE
diff --git a/server/app/models.yml b/server/app/models.yml
index 65659f2a..0560f6cd 100644
--- a/server/app/models.yml
+++ b/server/app/models.yml
@@ -1,253 +1,377 @@
# this file is autogenerated by the ../scripts/generate_models_list.py script.
# do not edit manually!
+Universal:
+- name: whisper-tiny
+ url: guillaumekln/faster-whisper-tiny
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 74M
+ type: whisper
+ download_type: huggingface
+- name: whisper-base
+ url: guillaumekln/faster-whisper-base
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 141M
+ type: whisper
+ download_type: huggingface
+- name: whisper-small
+ url: guillaumekln/faster-whisper-small
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 463M
+ type: whisper
+ download_type: huggingface
+- name: whisper-medium
+ url: guillaumekln/faster-whisper-medium
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 1459M
+ type: whisper
+ download_type: huggingface
+- name: whisper-large-v1
+ url: guillaumekln/faster-whisper-large-v1
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 2946M
+ type: whisper
+ download_type: huggingface
+- name: whisper-large-v2
+ url: guillaumekln/faster-whisper-large-v2
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 2946M
+ type: whisper
+ download_type: huggingface
English:
+- name: whisper-tiny.en
+ url: guillaumekln/faster-whisper-tiny.en
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 74M
+ type: whisper
+ download_type: huggingface
+- name: whisper-base.en
+ url: guillaumekln/faster-whisper-base.en
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 140M
+ type: whisper
+ download_type: huggingface
+- name: whisper-small.en
+ url: guillaumekln/faster-whisper-small.en
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 463M
+ type: whisper
+ download_type: huggingface
+- name: whisper-medium.en
+ url: guillaumekln/faster-whisper-medium.en
+ description: Whisper model doing both transcription and punctuation reconstruction
+ size: 1459M
+ type: whisper
+ download_type: huggingface
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
description: Lightweight wideband model for Android and RPi
size: 40M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.22.zip
description: Accurate generic US English model
size: 1.8G
type: transcription
- compressed: true
+ download_type: http+zip
- name: lgraph
url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.22-lgraph.zip
description: Big US English model with dynamic graph
size: 128M
type: transcription
- compressed: true
+ download_type: http+zip
+- name: big-2
+ url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.42-gigaspeech.zip
+ description: Accurate generic US English model trained by Kaldi on Gigaspeech.
+ Mostly for podcasts, not for telephony
+ size: 2.3G
+ type: transcription
+ download_type: http+zip
+- name: big-3
+ url: https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905.zip
+ description: Wideband model for dictation from Kaldi-active-grammar
+ project
+ size: 1.0G
+ type: transcription
+ download_type: http+zip
+- name: lgraph-2
+ url: https://alphacephei.com/vosk/models/vosk-model-en-us-daanzu-20200905-lgraph.zip
+ description: Wideband model for dictation from Kaldi-active-grammar
+ project with configurable graph
+ size: 129M
+ type: transcription
+ download_type: http+zip
+- name: big-4
+ url: https://alphacephei.com/vosk/models/vosk-model-en-us-librispeech-0.2.zip
+ description: Repackaged Librispeech model from Kaldi,
+ not very accurate
+ size: 845M
+ type: transcription
+ download_type: http+zip
+- name: small-2
+ url: https://alphacephei.com/vosk/models/vosk-model-small-en-us-zamia-0.5.zip
+ description: Repackaged Zamia model f_250, mainly for research
+ size: 49M
+ type: transcription
+ download_type: http+zip
+- name: big-5
+ url: https://alphacephei.com/vosk/models/vosk-model-en-us-aspire-0.2.zip
+ description: Kaldi original ASPIRE model, not very accurate
+ size: 1.4G
+ type: transcription
+ download_type: http+zip
+- name: big-6
+ url: https://alphacephei.com/vosk/models/vosk-model-en-us-0.21.zip
+ description: Wideband model previous generation
+ size: 1.6G
+ type: transcription
+ download_type: http+zip
Indian English:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-en-in-0.5.zip
description: Generic Indian English model for telecom and broadcast
size: 1G
type: transcription
- compressed: true
+ download_type: http+zip
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-en-in-0.4.zip
description: Lightweight Indian English model for mobile applications
size: 36M
type: transcription
- compressed: true
+ download_type: http+zip
Chinese:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-cn-0.22.zip
description: Lightweight model for Android and RPi
size: 42M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-cn-0.22.zip
description: Big generic Chinese model for server processing
size: 1.3G
type: transcription
- compressed: true
-Chinese Other:
-- name: big
+ download_type: http+zip
+- name: big-2
url: https://alphacephei.com/vosk/models/vosk-model-cn-kaldi-multicn-0.15.zip
description: Original Wideband Kaldi multi-cn model from Kaldi
with Vosk LM
size: 1.5G
type: transcription
- compressed: true
+ download_type: http+zip
Russian:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-ru-0.42.zip
description: Big mixed band Russian model for servers
size: 1.8G
type: transcription
- compressed: true
+ download_type: http+zip
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-ru-0.22.zip
description: Lightweight wideband model for Android/iOS and RPi
size: 45M
type: transcription
- compressed: true
-Russian Other:
-- name: big
+ download_type: http+zip
+- name: big-2
url: https://alphacephei.com/vosk/models/vosk-model-ru-0.22.zip
description: Big mixed band Russian model for servers
size: 1.5G
type: transcription
- compressed: true
-- name: big-2
+ download_type: http+zip
+- name: big-3
url: https://alphacephei.com/vosk/models/vosk-model-ru-0.10.zip
description: Big narrowband Russian model for servers
size: 2.5G
type: transcription
- compressed: true
+ download_type: http+zip
French:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-fr-0.22.zip
description: Lightweight wideband model for Android/iOS and RPi
size: 41M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-fr-0.22.zip
description: Big accurate model for servers
size: 1.4G
type: transcription
- compressed: true
-French Other:
-- name: small
+ download_type: http+zip
+- name: small-2
url: https://alphacephei.com/vosk/models/vosk-model-small-fr-pguyot-0.3.zip
description: Lightweight wideband model for Android and RPi trained by Paul
Guyot
size: 39M
type: transcription
- compressed: true
+ download_type: http+zip
- name: linto-2.2
url: https://alphacephei.com/vosk/models/vosk-model-fr-0.6-linto-2.2.0.zip
description: Model from LINTO
project
size: 1.5G
type: transcription
- compressed: true
+ download_type: http+zip
German:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-de-0.21.zip
description: Big German model for telephony and server
size: 1.9G
type: transcription
- compressed: true
+ download_type: http+zip
- name: big-2
url: https://alphacephei.com/vosk/models/vosk-model-de-tuda-0.6-900k.zip
description: Latest big wideband model from Tuda-DE
project
size: 4.4G
type: transcription
- compressed: true
+ download_type: http+zip
- name: small
+ url: https://alphacephei.com/vosk/models/vosk-model-small-de-zamia-0.3.zip
+ description: Zamia f_250 small model repackaged (not recommended)
+ size: 49M
+ type: transcription
+ download_type: http+zip
+- name: small-2
url: https://alphacephei.com/vosk/models/vosk-model-small-de-0.15.zip
description: Lightweight wideband model for Android and RPi
size: 45M
type: transcription
- compressed: true
+ download_type: http+zip
Spanish:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-es-0.42.zip
description: Lightweight wideband model for Android and RPi
size: 39M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-es-0.42.zip
description: Big model for Spanish
size: 1.4G
type: transcription
- compressed: true
+ download_type: http+zip
Portuguese/Brazilian Portuguese:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-pt-0.3.zip
description: Lightweight wideband model for Android and RPi
size: 31M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-pt-fb-v0.1.1-20220516_2113.zip
description: Big model from FalaBrazil
size: 1.6G
type: transcription
- compressed: true
+ download_type: http+zip
+Greek:
+- name: big
+ url: https://alphacephei.com/vosk/models/vosk-model-el-gr-0.7.zip
+ description: Big narrowband Greek model for server processing, not extremely accurate
+ though
+ size: 1.1G
+ type: transcription
+ download_type: http+zip
Turkish:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-tr-0.3.zip
description: Lightweight wideband model for Android and RPi
size: 35M
type: transcription
- compressed: true
+ download_type: http+zip
Vietnamese:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-vn-0.4.zip
description: Lightweight Vietnamese model
size: 32M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-vn-0.4.zip
description: Bigger Vietnamese model for server
size: 78M
type: transcription
- compressed: true
+ download_type: http+zip
Italian:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-it-0.22.zip
description: Lightweight model for Android and RPi
size: 48M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-it-0.22.zip
description: Big generic Italian model for servers
size: 1.2G
type: transcription
- compressed: true
+ download_type: http+zip
Dutch:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-nl-0.22.zip
description: Lightweight model for Dutch
size: 39M
type: transcription
- compressed: true
-Dutch Other:
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-nl-spraakherkenning-0.6.zip
description: Medium Dutch model from Kaldi_NL
size: 860M
type: transcription
- compressed: true
+ download_type: http+zip
- name: lgraph
url: https://alphacephei.com/vosk/models/vosk-model-nl-spraakherkenning-0.6-lgraph.zip
description: Smaller model with dynamic graph
size: 100M
type: transcription
- compressed: true
+ download_type: http+zip
Catalan:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-ca-0.4.zip
description: Lightweight wideband model for Android and RPi for Catalan
size: 42M
type: transcription
- compressed: true
+ download_type: http+zip
Arabic:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-ar-mgb2-0.4.zip
description: Repackaged Arabic model trained on MGB2 dataset from Kaldi
size: 318M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big-2
url: https://alphacephei.com/vosk/models/vosk-model-ar-0.22-linto-1.1.0.zip
description: Big model from LINTO
project
size: 1.3G
type: transcription
- compressed: true
+ download_type: http+zip
Farsi:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-fa-0.4.zip
description: Lightweight wideband model for Android and RPi for Farsi (Persian)
size: 47M
type: transcription
- compressed: true
+ download_type: http+zip
+- name: big
+ url: https://alphacephei.com/vosk/models/vosk-model-fa-0.5.zip
+ description: Model with large vocabulary, not yet accurate but better than before
+ (Persian)
+ size: 1G
+ type: transcription
+ download_type: http+zip
- name: small-2
url: https://alphacephei.com/vosk/models/vosk-model-small-fa-0.5.zip
description: Bigger small model for desktop application (Persian)
size: 60M
type: transcription
- compressed: true
+ download_type: http+zip
Filipino:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-tl-ph-generic-0.6.zip
description: Medium wideband model for Filipino (Tagalog) by feddybear
size: 320M
type: transcription
- compressed: true
+ download_type: http+zip
Ukrainian:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-nano.zip
@@ -255,41 +379,41 @@ Ukrainian:
Recognition for Ukrainian
size: 73M
type: transcription
- compressed: true
+ download_type: http+zip
- name: small-2
url: https://alphacephei.com/vosk/models/vosk-model-small-uk-v3-small.zip
description: Small model from Speech
Recognition for Ukrainian
size: 133M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-uk-v3.zip
description: Bigger model from Speech
Recognition for Ukrainian
size: 343M
type: transcription
- compressed: true
+ download_type: http+zip
- name: lgraph
url: https://alphacephei.com/vosk/models/vosk-model-uk-v3-lgraph.zip
description: Big dynamic model from Speech
Recognition for Ukrainian
size: 325M
type: transcription
- compressed: true
+ download_type: http+zip
Kazakh:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-kz-0.15.zip
description: Small mobile model from SAIDA_Kazakh
size: 42M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-kz-0.15.zip
description: Bigger wideband model SAIDA_Kazakh
size: 378M
type: transcription
- compressed: true
+ download_type: http+zip
Swedish:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-sv-rhasspy-0.15.zip
@@ -297,68 +421,68 @@ Swedish:
project
size: 289M
type: transcription
- compressed: true
+ download_type: http+zip
Japanese:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-ja-0.22.zip
description: Lightweight wideband model for Japanese
size: 48M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-ja-0.22.zip
description: Big model for Japanese
size: 1Gb
type: transcription
- compressed: true
+ download_type: http+zip
Esperanto:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-eo-0.42.zip
description: Lightweight model for Esperanto
size: 42M
type: transcription
- compressed: true
+ download_type: http+zip
Hindi:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-hi-0.22.zip
description: Lightweight model for Hindi
size: 42M
type: transcription
- compressed: true
+ download_type: http+zip
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-hi-0.22.zip
description: Big accurate model for servers
size: 1.5Gb
type: transcription
- compressed: true
+ download_type: http+zip
Czech:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-cs-0.4-rhasspy.zip
description: Lightweight model for Czech from Rhasspy project
size: 44M
type: transcription
- compressed: true
+ download_type: http+zip
Polish:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-pl-0.22.zip
description: Lightweight model for Polish
size: 50M
type: transcription
- compressed: true
+ download_type: http+zip
Uzbek:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-uz-0.22.zip
description: Lightweight model for Uzbek
size: 49M
type: transcription
- compressed: true
+ download_type: http+zip
Korean:
- name: small
url: https://alphacephei.com/vosk/models/vosk-model-small-ko-0.22.zip
description: Lightweight model for Korean
size: 82M
type: transcription
- compressed: true
+ download_type: http+zip
Breton:
- name: big
url: https://alphacephei.com/vosk/models/vosk-model-br-0.8.zip
@@ -366,4 +490,4 @@ Breton:
project
size: 70M
type: transcription
- compressed: true
+ download_type: http+zip
diff --git a/server/scripts/generate_models_list.py b/server/scripts/generate_models_list.py
index 2d3737ef..2092a5d6 100644
--- a/server/scripts/generate_models_list.py
+++ b/server/scripts/generate_models_list.py
@@ -1,12 +1,45 @@
from collections import defaultdict
from pathlib import Path
+import huggingface_hub
import requests
import yaml
from bs4 import BeautifulSoup
+WHISPER_MODELS = {
+ "tiny": "guillaumekln/faster-whisper-tiny",
+ "base": "guillaumekln/faster-whisper-base",
+ "small": "guillaumekln/faster-whisper-small",
+ "medium": "guillaumekln/faster-whisper-medium",
+ "large-v1": "guillaumekln/faster-whisper-large-v1",
+ "large-v2": "guillaumekln/faster-whisper-large-v2",
+ "tiny.en": "guillaumekln/faster-whisper-tiny.en",
+ "base.en": "guillaumekln/faster-whisper-base.en",
+ "small.en": "guillaumekln/faster-whisper-small.en",
+ "medium.en": "guillaumekln/faster-whisper-medium.en",
+}
+
HARDCODED_MODELS = []
+models = []
+
+api = huggingface_hub.HfApi()
+for name, url in WHISPER_MODELS.items():
+ repo_info = api.repo_info(url, files_metadata=True)
+ models.append(
+ {
+ "lang": "English" if name.endswith(".en") else "Universal",
+ "name": f"whisper-{name}",
+ "url": url,
+ "description": "Whisper model doing both transcription and punctuation reconstruction",
+ "size": f"{int(sum(f.size for f in repo_info.siblings) / 1024 / 1024)}M",
+ "type": "whisper",
+ "download_type": "huggingface",
+ },
+ )
+
+models.extend(HARDCODED_MODELS)
+
r = requests.get("https://alphacephei.com/vosk/models")
assert r.status_code == 200
soup = BeautifulSoup(r.content, "html.parser")
@@ -14,8 +47,6 @@
columns = [x.text for x in table.find_all("th")]
rows = table.find("tbody").find_all("tr")
-
-models = HARDCODED_MODELS
current_lang = None
for row in rows:
if strong := row.find("strong"):
@@ -26,8 +57,7 @@
), "no previous language heading found, probably the format changed :("
raw = {k: v for k, v in zip(columns, row.find_all("td"))}
- if current_lang == "English Other" or "not" in raw["Notes"].text.lower():
- continue
+ current_lang = current_lang.replace("Other", "").strip()
if current_lang == "Speaker identification model":
continue
@@ -46,7 +76,7 @@
description=raw["Notes"].decode_contents(),
size=raw["Size"].text,
type="transcription",
- compressed=True,
+ download_type="http+zip",
)
models += [model]