From 2ab6ace872e7409a59511bc34109e19f78445938 Mon Sep 17 00:00:00 2001
From: MaxMax2016 <525942103@qq.com>
Date: Sun, 28 May 2023 22:30:58 +0800
Subject: [PATCH] create singer

---
 README.md        | 28 ++++++++++++++++++++++++----
 svc_eva.py       | 20 ++++++++++++++++++++
 svc_inference.py |  3 ++-
 3 files changed, 46 insertions(+), 5 deletions(-)
 create mode 100644 svc_eva.py

diff --git a/README.md b/README.md
index 16075514..ab93cef4 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,8 @@
 
 - 【无 泄漏】支持多发音人
 
+- 【捏 音色】创造独有发音人
+
 - 【带 伴奏】也能进行转换，轻度伴奏
 
 - 【用 Excel】进行原始调教，纯手工
@@ -29,9 +31,9 @@
 
 本项目将继续完成基于BIGVGAN的模型（32K），在此之后，有成果再更新项目
 
-## 模型和日志：https://github.com/PlayVoice/so-vits-svc-5.0/releases/tag/v5.3
+## 模型和日志：https://github.com/PlayVoice/so-vits-svc-5.0/releases/tag/base_release_hifigan
 
-- [5.0.epoch1200.full.pth](https://github.com/PlayVoice/so-vits-svc-5.0/releases/download/v5.3/5.0.epoch1200.full.pth)模型包括：生成器+判别器=176M，可用作预训练模型
+- [5.0.epoch1200.full.pth](https://github.com/PlayVoice/so-vits-svc-5.0/releases/download/base_release_hifigan/5.0.epoch1200.full.pth)模型包括：生成器+判别器=176M，可用作预训练模型
 - 发音人（56个）文件在configs/singers目录中，可进行推理测试，尤其测试音色泄露
 - 发音人22，30，47，51辨识度较高，音频样本在configs/singers_sample目录中
 
@@ -42,7 +44,7 @@
 | natural speech | Microsoft | ✅ | 减少发音错误 | - |
 | neural source-filter | NII | ✅ | 解决断音问题 | 参数优化 |
 | speaker encoder | Google | ✅ | 音色编码与聚类 | - |
-| GRL for speaker | Ubisoft |✅ | 防止编码器泄露音色 | 原理类似判别器的对抗训练 |
+| GRL for speaker | Ubisoft |✅ | 防止编码器泄漏音色 | 原理类似判别器的对抗训练 |
 | one shot vits |  Samsung | ✅ | VITS 一句话克隆 | - |
 | SCLN |  Microsoft | ✅ | 改善克隆 | - |
 | band extention | Adobe | ✅ | 16K升48K采样 | 数据处理 |
@@ -60,7 +62,7 @@
 💗必要的前处理：
 - 1 降噪&去伴奏
 - 2 频率提升
-- 3 音质提升，基于https://github.com/openvpi/vocoders ，待整合
+- 3 音质提升
 - 4 将音频剪裁为小于30秒的音频段，whisper的要求
 
 然后以下面文件结构将数据集放入dataset_raw目录
@@ -255,6 +257,24 @@ data_svc/
     | ---  | --- | --- | --- | --- | --- | --- | --- |
     | name | 配置文件 | 模型文件 | 音色文件 | 音频文件 | 音频内容 | 音高内容 | 升降调 |
 
+## 捏音色
+纯属巧合的取名：average -> ave -> eva，夏娃代表者孕育和繁衍
+
+> python svc_eva.py
+
+```python
+eva_conf = {
+    './configs/singers/singer0022.npy': 0,
+    './configs/singers/singer0030.npy': 0,
+    './configs/singers/singer0047.npy': 0.5,
+    './configs/singers/singer0051.npy': 0.5,
+}
+```
+
+生成的音色文件为：eva.spk.npy
+
+💗Flow和Decoder均需要输入，您甚至可以给两个模块输入不同的音色参数，捏出更独特的音色。
+
 ## 数据集
 
 | Name | URL |
diff --git a/svc_eva.py b/svc_eva.py
new file mode 100644
index 00000000..905d34e7
--- /dev/null
+++ b/svc_eva.py
@@ -0,0 +1,20 @@
+import os
+import numpy as np
+
+# average -> ave -> eva :haha
+
+eva_conf = {
+    './configs/singers/singer0022.npy': 0,
+    './configs/singers/singer0030.npy': 0,
+    './configs/singers/singer0047.npy': 0.5,
+    './configs/singers/singer0051.npy': 0.5,
+}
+
+if __name__ == "__main__":
+
+    eva = np.zeros(256)
+    for k, v in eva_conf.items():
+        assert os.path.isfile(k), k
+        spk = np.load(k)
+        eva = eva + spk * v
+    np.save("eva.spk.npy", eva, allow_pickle=False)
diff --git a/svc_inference.py b/svc_inference.py
index 223e9fcb..ce952296 100644
--- a/svc_inference.py
+++ b/svc_inference.py
@@ -56,8 +56,9 @@ def main(args):
     ppg = torch.FloatTensor(ppg)
 
     pit = load_csv_pitch(args.pit)
+    print("pitch shift: ", args.shift)
     if (args.shift == 0):
-        print("don't use pitch shift")
+        pass
     else:
         pit = np.array(pit)
         source = pit[pit > 0]