C#使用whisper.net实现语音识别(语音转文本)

C#使用whisper.net实现语音识别(语音转文本)

    正在检查是否收录...

目录

介绍

效果

输出信息 

项目

代码

下载 

介绍

github地址:https://github.com/sandrohanea/whisper.net

Whisper.net. Speech to text made simple using Whisper Models

模型下载地址:https://huggingface.co/sandrohanea/whisper.net/tree/main/classic

效果

输出信息 

whisper_init_from_file_no_state: loading model from 'ggml-small.bin'
whisper_model_load: loading model
whisper_model_load: n_vocab       = 51865
whisper_model_load: n_audio_ctx   = 1500
whisper_model_load: n_audio_state = 768
whisper_model_load: n_audio_head  = 12
whisper_model_load: n_audio_layer = 12
whisper_model_load: n_text_ctx    = 448
whisper_model_load: n_text_state  = 768
whisper_model_load: n_text_head   = 12
whisper_model_load: n_text_layer  = 12
whisper_model_load: n_mels        = 80
whisper_model_load: ftype         = 1
whisper_model_load: qntvr         = 0
whisper_model_load: type          = 3
whisper_model_load: mem required  =  743.00 MB (+   16.00 MB per decoder)
whisper_model_load: adding 1608 extra tokens
whisper_model_load: model ctx     =  464.68 MB
whisper_model_load: model size    =  464.44 MB
whisper_init_state: kv self size  =   15.75 MB
whisper_init_state: kv cross size =   52.73 MB
00:00:00->00:00:20: 皇鶴楼,崔昊,西人已成皇鶴去,此地空于皇鶴楼,皇鶴一去不复返,白云千载空悠悠。
00:00:20->00:00:39: 青川莉莉汉阳树,方草七七英五周,日暮相关何处事,燕泊江上世人愁。

项目

代码

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using Whisper.net;
using static System.Net.Mime.MediaTypeNames;

namespace C_使用whisper.net实现语音转文本
{
    public partial class Form1 : Form
    {
        public Form1()
        {
            InitializeComponent();
        }

        string fileFilter = "*.wav|*.wav";
        string wavFileName = "";
        WhisperFactory whisperFactory;
        WhisperProcessor processor;
        private async void button2_Click(object sender, EventArgs e)
        {
            if (wavFileName == "")
            {
                return;
            }

            try
            {
                button2.Enabled = false;
                using var fileStream = File.OpenRead(wavFileName);
                await foreach (var result in processor.ProcessAsync(fileStream))
                {
                    Console.WriteLine($"{result.Start}->{result.End}: {result.Text}\r\n");
                    txtResult.Text += $"{result.Start}->{result.End}: {result.Text}\r\n";
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
            finally
            {
                button2.Enabled = true;
            }

        }

        private void Form1_Load(object sender, EventArgs e)
        {
            whisperFactory = WhisperFactory.FromPath("ggml-small.bin");

            processor = whisperFactory.CreateBuilder()
               .WithLanguage("zh")//.WithLanguage("auto")
               .Build();

            wavFileName = "085黄鹤楼.wav";
            txtFileName.Text = wavFileName;
        }

        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();
            ofd.Filter = fileFilter;
            if (ofd.ShowDialog() != DialogResult.OK) return;

            txtResult.Text = "";

            wavFileName = ofd.FileName;
            txtFileName.Text = wavFileName;
        }
    }
}

using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using System.Windows.Forms; using Whisper.net; using static System.Net.Mime.MediaTypeNames; namespace C_使用whisper.net实现语音转文本 { public partial class Form1 : Form { public Form1() { InitializeComponent(); } string fileFilter = "*.wav|*.wav"; string wavFileName = ""; WhisperFactory whisperFactory; WhisperProcessor processor; private async void button2_Click(object sender, EventArgs e) { if (wavFileName == "") { return; } try { button2.Enabled = false; using var fileStream = File.OpenRead(wavFileName); await foreach (var result in processor.ProcessAsync(fileStream)) { Console.WriteLine($"{result.Start}->{result.End}: {result.Text}\r\n"); txtResult.Text += $"{result.Start}->{result.End}: {result.Text}\r\n"; } } catch (Exception ex) { MessageBox.Show(ex.Message); } finally { button2.Enabled = true; } } private void Form1_Load(object sender, EventArgs e) { whisperFactory = WhisperFactory.FromPath("ggml-small.bin"); processor = whisperFactory.CreateBuilder() .WithLanguage("zh")//.WithLanguage("auto") .Build(); wavFileName = "085黄鹤楼.wav"; txtFileName.Text = wavFileName; } private void button1_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = fileFilter; if (ofd.ShowDialog() != DialogResult.OK) return; txtResult.Text = ""; wavFileName = ofd.FileName; txtFileName.Text = wavFileName; } } } 

下载 

源码下载

whisperstemctoivaclicode语音转文本githubgitwindowserpcreatehuggingfacetokenurl
  • 本文作者:李琛
  • 本文链接: https://wapzz.net/post-1766.html
  • 版权声明:本博客所有文章除特别声明外,均默认采用 CC BY-NC-SA 4.0 许可协议。
本站部分内容来源于网络转载,仅供学习交流使用。如涉及版权问题,请及时联系我们,我们将第一时间处理。
文章很赞!支持一下吧 还没有人为TA充电
为TA充电
还没有人为TA充电
0
  • 支付宝打赏
    支付宝扫一扫
  • 微信打赏
    微信扫一扫
感谢支持
文章很赞!支持一下吧
关于作者
2.3W+
5
0
1
WAP站长官方

GPT-4+物理引擎加持扩散模型,生成视频逼真、连贯、合理

上一篇

Redis内存空间预估与内存优化策略:保障数据安全与性能的架构实践AIGC/AI绘画/chatGPT/SD/MJ

下一篇
  • 复制图片
按住ctrl可打开默认菜单