From 2ab6ace872e7409a59511bc34109e19f78445938 Mon Sep 17 00:00:00 2001 From: MaxMax2016 <525942103@qq.com> Date: Sun, 28 May 2023 22:30:58 +0800 Subject: [PATCH] create singer --- README.md | 28 ++++++++++++++++++++++++---- svc_eva.py | 20 ++++++++++++++++++++ svc_inference.py | 3 ++- 3 files changed, 46 insertions(+), 5 deletions(-) create mode 100644 svc_eva.py diff --git a/README.md b/README.md index 16075514..ab93cef4 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,8 @@ - 【无 泄漏】支持多发音人 +- 【捏 音色】创造独有发音人 + - 【带 伴奏】也能进行转换,轻度伴奏 - 【用 Excel】进行原始调教,纯手工 @@ -29,9 +31,9 @@ 本项目将继续完成基于BIGVGAN的模型(32K),在此之后,有成果再更新项目 -## 模型和日志:https://github.com/PlayVoice/so-vits-svc-5.0/releases/tag/v5.3 +## 模型和日志:https://github.com/PlayVoice/so-vits-svc-5.0/releases/tag/base_release_hifigan -- [5.0.epoch1200.full.pth](https://github.com/PlayVoice/so-vits-svc-5.0/releases/download/v5.3/5.0.epoch1200.full.pth)模型包括:生成器+判别器=176M,可用作预训练模型 +- [5.0.epoch1200.full.pth](https://github.com/PlayVoice/so-vits-svc-5.0/releases/download/base_release_hifigan/5.0.epoch1200.full.pth)模型包括:生成器+判别器=176M,可用作预训练模型 - 发音人(56个)文件在configs/singers目录中,可进行推理测试,尤其测试音色泄露 - 发音人22,30,47,51辨识度较高,音频样本在configs/singers_sample目录中 @@ -42,7 +44,7 @@ | natural speech | Microsoft | ✅ | 减少发音错误 | - | | neural source-filter | NII | ✅ | 解决断音问题 | 参数优化 | | speaker encoder | Google | ✅ | 音色编码与聚类 | - | -| GRL for speaker | Ubisoft |✅ | 防止编码器泄露音色 | 原理类似判别器的对抗训练 | +| GRL for speaker | Ubisoft |✅ | 防止编码器泄漏音色 | 原理类似判别器的对抗训练 | | one shot vits | Samsung | ✅ | VITS 一句话克隆 | - | | SCLN | Microsoft | ✅ | 改善克隆 | - | | band extention | Adobe | ✅ | 16K升48K采样 | 数据处理 | @@ -60,7 +62,7 @@ 💗必要的前处理: - 1 降噪&去伴奏 - 2 频率提升 -- 3 音质提升,基于https://github.com/openvpi/vocoders ,待整合 +- 3 音质提升 - 4 将音频剪裁为小于30秒的音频段,whisper的要求 然后以下面文件结构将数据集放入dataset_raw目录 @@ -255,6 +257,24 @@ data_svc/ | --- | --- | --- | --- | --- | --- | --- | --- | | name | 配置文件 | 模型文件 | 音色文件 | 音频文件 | 音频内容 | 音高内容 | 升降调 | +## 捏音色 +纯属巧合的取名:average -> ave -> eva,夏娃代表者孕育和繁衍 + +> python svc_eva.py + +```python +eva_conf = { + './configs/singers/singer0022.npy': 0, + './configs/singers/singer0030.npy': 0, + './configs/singers/singer0047.npy': 0.5, + './configs/singers/singer0051.npy': 0.5, +} +``` + +生成的音色文件为:eva.spk.npy + +💗Flow和Decoder均需要输入,您甚至可以给两个模块输入不同的音色参数,捏出更独特的音色。 + ## 数据集 | Name | URL | diff --git a/svc_eva.py b/svc_eva.py new file mode 100644 index 00000000..905d34e7 --- /dev/null +++ b/svc_eva.py @@ -0,0 +1,20 @@ +import os +import numpy as np + +# average -> ave -> eva :haha + +eva_conf = { + './configs/singers/singer0022.npy': 0, + './configs/singers/singer0030.npy': 0, + './configs/singers/singer0047.npy': 0.5, + './configs/singers/singer0051.npy': 0.5, +} + +if __name__ == "__main__": + + eva = np.zeros(256) + for k, v in eva_conf.items(): + assert os.path.isfile(k), k + spk = np.load(k) + eva = eva + spk * v + np.save("eva.spk.npy", eva, allow_pickle=False) diff --git a/svc_inference.py b/svc_inference.py index 223e9fcb..ce952296 100644 --- a/svc_inference.py +++ b/svc_inference.py @@ -56,8 +56,9 @@ def main(args): ppg = torch.FloatTensor(ppg) pit = load_csv_pitch(args.pit) + print("pitch shift: ", args.shift) if (args.shift == 0): - print("don't use pitch shift") + pass else: pit = np.array(pit) source = pit[pit > 0]