DeepLearning101 commited on
Commit
e34c54b
1 Parent(s): 0075f67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -23
app.py CHANGED
@@ -7,15 +7,6 @@ import torchaudio
7
  import numpy as np
8
  from denoiser.demucs import Demucs
9
  from pydub import AudioSegment
10
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
11
-
12
- # # 設置 Hugging Face Hub 的 Access Token
13
- # auth_token = os.getenv("HF_TOKEN")
14
-
15
- # # 加載私有模型
16
- # model_id = "DeepLearning101/Speech-Quality-Inspection_Meta-Denoiser"
17
- # model = AutoModelForSequenceClassification.from_pretrained(model_id, token=auth_token)
18
- # tokenizer = AutoTokenizer.from_pretrained(model_id, token=auth_token)
19
 
20
  modelpath = './denoiser/master64.th'
21
 
@@ -29,28 +20,17 @@ def transcribe(file_upload, microphone):
29
  out = demucs(x[None])[0]
30
  out = out / max(out.abs().max().item(), 1)
31
  torchaudio.save('enhanced.wav', out, sr)
32
- enhanced = AudioSegment.from_wav('enhanced.wav') #只有去完噪的需要降bitrate再做語音識別
33
  enhanced.export('enhanced.wav', format="wav", bitrate="256k")
34
  return "enhanced.wav"
35
 
36
- # # 假設模型是用於文本分類
37
- # inputs = tokenizer("enhanced.wav", return_tensors="pt")
38
- # outputs = model(**inputs)
39
- # predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
40
-
41
- # return "enhanced.wav", predictions
42
-
43
  demo = gr.Interface(
44
  fn=transcribe,
45
  inputs=[
46
- gr.Audio(source="microphone", type="filepath", optional=True, label="語音質檢麥克風實時錄音"),
47
- gr.Audio(source="upload", type="filepath", optional=True, label="語音質檢原始音檔"),
48
  ],
49
  outputs=gr.Audio(type="filepath", label="Output"),
50
- # outputs=[
51
- # gr.Audio(type="filepath", label="Output"),
52
- # gr.Textbox(label="Model Predictions")
53
- # ],
54
  title="<p style='text-align: center'><a href='https://www.twman.org/AI' target='_blank'>語音質檢噪音去除 (語音增強):Meta Denoiser</a>",
55
  description="為了提升語音識別的效果,可以在識別前先進行噪音去除",
56
  allow_flagging="never",
 
7
  import numpy as np
8
  from denoiser.demucs import Demucs
9
  from pydub import AudioSegment
 
 
 
 
 
 
 
 
 
10
 
11
  modelpath = './denoiser/master64.th'
12
 
 
20
  out = demucs(x[None])[0]
21
  out = out / max(out.abs().max().item(), 1)
22
  torchaudio.save('enhanced.wav', out, sr)
23
+ enhanced = AudioSegment.from_wav('enhanced.wav') # 只有去完噪的需要降 bitrate 再做語音識別
24
  enhanced.export('enhanced.wav', format="wav", bitrate="256k")
25
  return "enhanced.wav"
26
 
 
 
 
 
 
 
 
27
  demo = gr.Interface(
28
  fn=transcribe,
29
  inputs=[
30
+ gr.Audio(type="filepath", label="語音質檢麥克風實時錄音"),
31
+ gr.Audio(type="filepath", label="語音質檢原始音檔"),
32
  ],
33
  outputs=gr.Audio(type="filepath", label="Output"),
 
 
 
 
34
  title="<p style='text-align: center'><a href='https://www.twman.org/AI' target='_blank'>語音質檢噪音去除 (語音增強):Meta Denoiser</a>",
35
  description="為了提升語音識別的效果,可以在識別前先進行噪音去除",
36
  allow_flagging="never",