UI structure started. SemanticSimilarity implemented. User settings implemented.
This commit is contained in:
parent
68ddc43446
commit
6f94f60b15
|
|
@ -2,5 +2,6 @@
|
||||||
<project version="4">
|
<project version="4">
|
||||||
<component name="VcsDirectoryMappings">
|
<component name="VcsDirectoryMappings">
|
||||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
<mapping directory="$PROJECT_DIR$/src/main/resources/Semantic-model" vcs="Git" />
|
||||||
</component>
|
</component>
|
||||||
</project>
|
</project>
|
||||||
|
|
@ -16,20 +16,49 @@ repositories {
|
||||||
}
|
}
|
||||||
|
|
||||||
dependencies {
|
dependencies {
|
||||||
// Note, if you develop a library, you should use compose.desktop.common.
|
val djlVersion = "0.30.0"
|
||||||
// compose.desktop.currentOs should be used in launcher-sourceSet
|
val jnaVersion = "5.15.0"
|
||||||
// (in a separate module for demo project and in testMain).
|
val voyagerVersion = "1.1.0-beta02"
|
||||||
// With compose.desktop.common you will also lose @Preview functionality
|
|
||||||
|
// Core
|
||||||
implementation(compose.desktop.currentOs)
|
implementation(compose.desktop.currentOs)
|
||||||
|
|
||||||
|
// Material Design
|
||||||
|
implementation(compose.materialIconsExtended)
|
||||||
|
|
||||||
// Speech to Text
|
// Speech to Text
|
||||||
implementation("com.alphacephei:vosk:0.3.45")
|
implementation("com.alphacephei:vosk:0.3.45")
|
||||||
//Java Native Access
|
|
||||||
implementation("net.java.dev.jna:jna:5.15.0")
|
//Java Native Access var capturing = false
|
||||||
implementation("net.java.dev.jna:jna-platform:5.15.0")
|
implementation("net.java.dev.jna:jna:$jnaVersion")
|
||||||
|
implementation("net.java.dev.jna:jna-platform:$jnaVersion")
|
||||||
|
|
||||||
//Coroutines
|
//Coroutines
|
||||||
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.9.0")
|
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.9.0")
|
||||||
|
|
||||||
|
// Voyager
|
||||||
|
// Navigator
|
||||||
|
implementation("cafe.adriel.voyager:voyager-navigator:$voyagerVersion")
|
||||||
|
// Screen Model
|
||||||
|
implementation("cafe.adriel.voyager:voyager-screenmodel:$voyagerVersion")
|
||||||
|
// BottomSheetNavigator
|
||||||
|
implementation("cafe.adriel.voyager:voyager-bottom-sheet-navigator:$voyagerVersion")
|
||||||
|
// TabNavigator
|
||||||
|
implementation("cafe.adriel.voyager:voyager-tab-navigator:$voyagerVersion")
|
||||||
|
// Transitions
|
||||||
|
implementation("cafe.adriel.voyager:voyager-transitions:$voyagerVersion")
|
||||||
|
|
||||||
|
// Deep Java Library
|
||||||
|
implementation("ai.djl:api:$djlVersion")
|
||||||
|
implementation("ai.djl.huggingface:tokenizers:$djlVersion")
|
||||||
|
implementation("ai.djl.pytorch:pytorch-engine:$djlVersion")
|
||||||
|
implementation("ai.djl.pytorch:pytorch-jni:2.4.0-$djlVersion")
|
||||||
|
|
||||||
|
// Semantic Kernel
|
||||||
|
implementation("com.microsoft.semantic-kernel:semantickernel-api:1.3.0")
|
||||||
|
|
||||||
|
// SLF4J
|
||||||
|
implementation("org.slf4j:slf4j-api:2.0.16")
|
||||||
}
|
}
|
||||||
|
|
||||||
compose.desktop {
|
compose.desktop {
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,92 @@
|
||||||
import androidx.compose.desktop.ui.tooling.preview.Preview
|
import androidx.compose.foundation.layout.Row
|
||||||
import androidx.compose.material.Button
|
import androidx.compose.material.Button
|
||||||
import androidx.compose.material.MaterialTheme
|
import androidx.compose.material.Icon
|
||||||
import androidx.compose.material.Text
|
import androidx.compose.material.Scaffold
|
||||||
|
import androidx.compose.material.icons.Icons
|
||||||
|
import androidx.compose.material.icons.filled.ArrowBackIosNew
|
||||||
|
import androidx.compose.material.icons.filled.PlayArrow
|
||||||
|
import androidx.compose.material.icons.filled.Stop
|
||||||
import androidx.compose.runtime.Composable
|
import androidx.compose.runtime.Composable
|
||||||
import androidx.compose.runtime.getValue
|
import androidx.compose.runtime.getValue
|
||||||
import androidx.compose.runtime.mutableStateOf
|
import androidx.compose.runtime.mutableStateOf
|
||||||
import androidx.compose.runtime.remember
|
import androidx.compose.runtime.saveable.rememberSaveable
|
||||||
import androidx.compose.runtime.setValue
|
import androidx.compose.runtime.setValue
|
||||||
import androidx.compose.ui.window.Window
|
import androidx.compose.ui.window.Window
|
||||||
import androidx.compose.ui.window.application
|
import androidx.compose.ui.window.application
|
||||||
|
import cafe.adriel.voyager.navigator.CurrentScreen
|
||||||
|
import cafe.adriel.voyager.navigator.LocalNavigator
|
||||||
|
import cafe.adriel.voyager.navigator.Navigator
|
||||||
|
import cafe.adriel.voyager.navigator.currentOrThrow
|
||||||
import kotlinx.coroutines.DelicateCoroutinesApi
|
import kotlinx.coroutines.DelicateCoroutinesApi
|
||||||
import kotlinx.coroutines.GlobalScope
|
import processing.SemanticSimilarity
|
||||||
import kotlinx.coroutines.async
|
import screens.Home
|
||||||
import kotlinx.coroutines.runBlocking
|
import screens.Settings
|
||||||
|
|
||||||
@OptIn(DelicateCoroutinesApi::class)
|
fun main() = application {
|
||||||
@Composable
|
val testSentence = "This is a test sentence for semantic similarity"
|
||||||
@Preview
|
val testCommands = listOf("This is a test", "This sentence is completely different", "Testing semantic similarity between sentence", "An unrelated statement")
|
||||||
fun App() {
|
|
||||||
var text by remember { mutableStateOf("Hello, World!") }
|
val semanticTester = SemanticSimilarity()
|
||||||
|
semanticTester.compareSentenceToList(testSentence, testCommands)
|
||||||
|
|
||||||
|
Window(onCloseRequest = ::exitApplication) {
|
||||||
|
Navigator(Home()) { navigator ->
|
||||||
|
Scaffold(
|
||||||
|
topBar = { topBar(navigator.size) },
|
||||||
|
content = { CurrentScreen() },
|
||||||
|
bottomBar = { }
|
||||||
|
)
|
||||||
|
|
||||||
MaterialTheme {
|
|
||||||
Button(onClick = {
|
|
||||||
val mic = Microphone()
|
|
||||||
GlobalScope.async { mic.startCapture() }
|
|
||||||
}) {
|
|
||||||
Text(text)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun main() = application {
|
@Composable
|
||||||
Window(onCloseRequest = ::exitApplication) {
|
fun topBar(screenCount: Int) {
|
||||||
App()
|
Row {
|
||||||
|
if (screenCount > 1) {
|
||||||
|
backButton()
|
||||||
|
}
|
||||||
|
captureButton()
|
||||||
|
Settings.settingsButton()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun backButton() {
|
||||||
|
val navigator = LocalNavigator.currentOrThrow
|
||||||
|
|
||||||
|
Button(onClick = { navigator.pop() }) {
|
||||||
|
Icon(
|
||||||
|
imageVector = Icons.Filled.ArrowBackIosNew,
|
||||||
|
contentDescription = "Go Back"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
val captureDuration = Settings.userProperties.getProperty("capture_duration").toInt()
|
||||||
|
@Composable
|
||||||
|
fun getCaptureIcon(capturing: Boolean) = if (!capturing) Icons.Filled.PlayArrow else Icons.Filled.Stop
|
||||||
|
@Composable
|
||||||
|
fun getCaptureDescription(capturing: Boolean) = if (!capturing) "Start" else "Stop"
|
||||||
|
fun getCaptureOnClick(mic: Microphone) = if (!mic.capturing) mic.startCapture(captureDuration) else mic.stopCapture()
|
||||||
|
|
||||||
|
@OptIn(DelicateCoroutinesApi::class)
|
||||||
|
@Composable
|
||||||
|
fun captureButton() {
|
||||||
|
val mic = Microphone()
|
||||||
|
var isCapturing by rememberSaveable { mutableStateOf(mic.capturing) }
|
||||||
|
Button(
|
||||||
|
onClick = {
|
||||||
|
getCaptureOnClick(mic)
|
||||||
|
isCapturing = !isCapturing
|
||||||
|
}
|
||||||
|
) {
|
||||||
|
Icon(
|
||||||
|
imageVector = getCaptureIcon(isCapturing),
|
||||||
|
contentDescription = getCaptureDescription(isCapturing) + " audio capture"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,17 @@
|
||||||
import kotlinx.coroutines.coroutineScope
|
import kotlinx.coroutines.DelicateCoroutinesApi
|
||||||
|
import kotlinx.coroutines.GlobalScope
|
||||||
|
import kotlinx.coroutines.async
|
||||||
import javax.sound.sampled.AudioFormat
|
import javax.sound.sampled.AudioFormat
|
||||||
import javax.sound.sampled.AudioSystem
|
import javax.sound.sampled.AudioSystem
|
||||||
import javax.sound.sampled.DataLine
|
import javax.sound.sampled.DataLine
|
||||||
import javax.sound.sampled.TargetDataLine
|
import javax.sound.sampled.TargetDataLine
|
||||||
import kotlinx.coroutines.launch
|
import processing.STT
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: Documentation
|
* TODO: Documentation
|
||||||
*/
|
*/
|
||||||
class Microphone {
|
class Microphone {
|
||||||
|
var capturing = false
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
@ -16,25 +19,27 @@ class Microphone {
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private val audioFormat = getFormat()
|
private val audioFormat = AudioFormat(16000.0f, 16, 1, true, false)
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
suspend fun startCapture() {
|
@OptIn(DelicateCoroutinesApi::class)
|
||||||
|
fun startCapture(captureDuration: Int) {
|
||||||
|
capturing = true
|
||||||
source.open(audioFormat, source.bufferSize)
|
source.open(audioFormat, source.bufferSize)
|
||||||
source.start()
|
source.start()
|
||||||
|
|
||||||
val buffer = ByteArray(160000)
|
val buffer = ByteArray(32000 * captureDuration) // sampleRate * sampleSizeInBits * channels * duration
|
||||||
//TODO: Start loop in own coroutine
|
GlobalScope.async {
|
||||||
coroutineScope {
|
val stt = STT()
|
||||||
launch {
|
|
||||||
while (true) {
|
while (capturing) {
|
||||||
val audioBytes = source.read(buffer, 0, buffer.size)
|
val audioBytes = source.read(buffer, 0, buffer.size)
|
||||||
val stt = STT()
|
val result = stt.parseBuffer(buffer, audioBytes)
|
||||||
val result = stt.parseBuffer(buffer, audioBytes)
|
println("Captured Speech: $result")
|
||||||
println("Captured Speech: $result")
|
//TODO: Pass onto semantic similarity
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
stt.closeModel()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
|
|
@ -42,24 +47,8 @@ class Microphone {
|
||||||
*/
|
*/
|
||||||
fun stopCapture() {
|
fun stopCapture() {
|
||||||
source.stop()
|
source.stop()
|
||||||
}
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
fun closeSource() {
|
|
||||||
source.close()
|
source.close()
|
||||||
}
|
capturing = false
|
||||||
/**
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
private fun getFormat(): AudioFormat {
|
|
||||||
return AudioFormat(
|
|
||||||
16000.0f, // 16000 Required
|
|
||||||
16,
|
|
||||||
1,
|
|
||||||
true,
|
|
||||||
false
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,10 @@
|
||||||
|
package processing
|
||||||
|
|
||||||
import org.vosk.Model
|
import org.vosk.Model
|
||||||
import org.vosk.Recognizer
|
import org.vosk.Recognizer
|
||||||
import java.nio.file.Paths
|
import java.nio.file.Paths
|
||||||
|
import kotlin.io.path.absolutePathString
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO: Documentation
|
* TODO: Documentation
|
||||||
*/
|
*/
|
||||||
|
|
@ -9,10 +13,12 @@ class STT {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private val model = Model(getModelPath())
|
private val model = Model(getModelPath())
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private val recognizer = Recognizer(model, 16000.0f)
|
private val recognizer = Recognizer(model, 16000.0f)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
@ -23,17 +29,16 @@ class STT {
|
||||||
recognizer.partialResult
|
recognizer.partialResult
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun closeModel() {
|
fun closeModel() {
|
||||||
recognizer.close()
|
recognizer.close()
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private fun getModelPath(): String {
|
private fun getModelPath() = Paths.get(
|
||||||
return Paths.get(
|
javaClass.getResource("/STT-model")!!.toURI()
|
||||||
javaClass.getResource("STT-model")!!.toURI()
|
).absolutePathString()
|
||||||
).toFile().absolutePath
|
|
||||||
}
|
|
||||||
|
|
||||||
//TODO: Install model into dir from resource link
|
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
package processing
|
||||||
|
|
||||||
|
import ai.djl.Application
|
||||||
|
import ai.djl.Model
|
||||||
|
import ai.djl.huggingface.tokenizers.Encoding
|
||||||
|
import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer;
|
||||||
|
import ai.djl.huggingface.translator.TextEmbeddingTranslator
|
||||||
|
import ai.djl.huggingface.translator.TextEmbeddingTranslatorFactory
|
||||||
|
import ai.djl.huggingface.zoo.HfModelZoo
|
||||||
|
import ai.djl.ndarray.NDArray
|
||||||
|
import ai.djl.ndarray.NDList
|
||||||
|
import ai.djl.ndarray.NDManager
|
||||||
|
import ai.djl.repository.zoo.Criteria
|
||||||
|
import ai.djl.training.util.ProgressBar
|
||||||
|
import ai.djl.translate.DeferredTranslatorFactory
|
||||||
|
import java.nio.file.Paths
|
||||||
|
import kotlin.math.sqrt
|
||||||
|
|
||||||
|
class SemanticSimilarity {
|
||||||
|
val manager = NDManager.newBaseManager()
|
||||||
|
val model = getCriteria().loadModel().newPredictor()
|
||||||
|
val tokenizer = HuggingFaceTokenizer.newInstance("sentence-transformers/all-mpnet-base-v2")
|
||||||
|
|
||||||
|
fun getCriteria(): Criteria<NDList, NDList> = Criteria.builder()
|
||||||
|
.setTypes(NDList::class.java, NDList::class.java)
|
||||||
|
.optModelPath(Paths.get(
|
||||||
|
javaClass.getResource("/Semantic-model")!!.toURI()
|
||||||
|
))
|
||||||
|
.optProgress(ProgressBar())
|
||||||
|
.build()
|
||||||
|
|
||||||
|
fun compareSentenceToList(sentence: String, list: List<String>) {
|
||||||
|
val sentenceEmbedding = generateEmbedding(sentence)
|
||||||
|
val similarities = list.map { string ->
|
||||||
|
Pair(
|
||||||
|
string,
|
||||||
|
cosineSimilarity(sentenceEmbedding, generateEmbedding(string))
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
similarities.forEach { similarity ->
|
||||||
|
println("Similarity between '$sentence' and '${similarity.first}': ${similarity.second}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fun generateEmbedding(input: String): NDArray {
|
||||||
|
val inputList = tokenizer.encode(input).toNDList(manager, false)
|
||||||
|
val inputIds = inputList[0].expandDims(0)
|
||||||
|
val attentionMask = inputList[1].expandDims(0)
|
||||||
|
|
||||||
|
return model.predict(
|
||||||
|
NDList(inputIds, attentionMask)
|
||||||
|
)[0].mean(intArrayOf(1)).normalize(2.0, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
|
||||||
|
fun cosineSimilarity(input: NDArray, command: NDArray): Float {
|
||||||
|
val inputVec = input.squeeze(0)
|
||||||
|
val commandVec = command.squeeze(0)
|
||||||
|
val dotProduct = input.matMul(commandVec).getFloat()
|
||||||
|
val normInput = calculateNorm(inputVec)
|
||||||
|
val normCommand = calculateNorm(commandVec)
|
||||||
|
|
||||||
|
return dotProduct / (normInput * normCommand)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun calculateNorm(array: NDArray) = sqrt(array.dot(array).getFloat())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
package screens
|
||||||
|
|
||||||
|
import Microphone
|
||||||
|
import androidx.compose.desktop.ui.tooling.preview.Preview
|
||||||
|
import androidx.compose.foundation.layout.Row
|
||||||
|
import androidx.compose.material.Button
|
||||||
|
import androidx.compose.material.Icon
|
||||||
|
import androidx.compose.material.MaterialTheme
|
||||||
|
import androidx.compose.material.icons.Icons
|
||||||
|
import androidx.compose.material.icons.filled.PlayArrow
|
||||||
|
import androidx.compose.material.icons.filled.Stop
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import androidx.compose.runtime.mutableStateOf
|
||||||
|
import androidx.compose.runtime.remember
|
||||||
|
import androidx.compose.runtime.saveable.rememberSaveable
|
||||||
|
import androidx.compose.ui.graphics.vector.ImageVector
|
||||||
|
import cafe.adriel.voyager.core.screen.Screen
|
||||||
|
import kotlinx.coroutines.DelicateCoroutinesApi
|
||||||
|
import kotlinx.coroutines.GlobalScope
|
||||||
|
import kotlinx.coroutines.async
|
||||||
|
import androidx.compose.runtime.getValue
|
||||||
|
import androidx.compose.runtime.setValue
|
||||||
|
|
||||||
|
class Home: Screen {
|
||||||
|
@Composable
|
||||||
|
@Preview
|
||||||
|
override fun Content() {
|
||||||
|
MaterialTheme {
|
||||||
|
Row {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
package screens
|
||||||
|
|
||||||
|
class Profiles {
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,58 @@
|
||||||
|
package screens
|
||||||
|
|
||||||
|
import androidx.compose.foundation.layout.Column
|
||||||
|
import androidx.compose.foundation.layout.Row
|
||||||
|
import androidx.compose.material.Button
|
||||||
|
import androidx.compose.material.Icon
|
||||||
|
import androidx.compose.material.Text
|
||||||
|
import androidx.compose.material.icons.Icons
|
||||||
|
import androidx.compose.material.icons.filled.Settings
|
||||||
|
import androidx.compose.runtime.Composable
|
||||||
|
import cafe.adriel.voyager.core.screen.Screen
|
||||||
|
import cafe.adriel.voyager.navigator.LocalNavigator
|
||||||
|
import cafe.adriel.voyager.navigator.currentOrThrow
|
||||||
|
import java.io.FileInputStream
|
||||||
|
import java.util.Properties
|
||||||
|
|
||||||
|
class Settings: Screen {
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
override fun Content() {
|
||||||
|
Column {
|
||||||
|
userProperties.iterator().forEach { property ->
|
||||||
|
Row { //TODO: Make look nice
|
||||||
|
Text("${property.key}")
|
||||||
|
Text("${property.value}") //TODO: This should be a different input element dependent on expected dtype (maybe use prefixes to determine)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
companion object {
|
||||||
|
val userProperties = getProperties()
|
||||||
|
|
||||||
|
@Composable
|
||||||
|
fun settingsButton() {
|
||||||
|
val navigator = LocalNavigator.currentOrThrow
|
||||||
|
Button(onClick = {
|
||||||
|
navigator.push(Settings())
|
||||||
|
}) {
|
||||||
|
Icon(
|
||||||
|
Icons.Filled.Settings,
|
||||||
|
contentDescription = "screens.Settings"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun getProperties(): Properties {
|
||||||
|
val properties = Properties()
|
||||||
|
properties.load(
|
||||||
|
FileInputStream(
|
||||||
|
javaClass.getResource("/user.properties")!!.file
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return properties
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
capture_duration=5
|
||||||
Loading…
Reference in New Issue