UI structure started. SemanticSimilarity implemented. User settings implemented.

This commit is contained in:
Joshua Perry 2024-10-24 23:52:44 +01:00
parent 68ddc43446
commit 6f94f60b15
10 changed files with 314 additions and 66 deletions

View File

@ -2,5 +2,6 @@
<project version="4"> <project version="4">
<component name="VcsDirectoryMappings"> <component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" /> <mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/src/main/resources/Semantic-model" vcs="Git" />
</component> </component>
</project> </project>

View File

@ -16,20 +16,49 @@ repositories {
} }
dependencies { dependencies {
// Note, if you develop a library, you should use compose.desktop.common. val djlVersion = "0.30.0"
// compose.desktop.currentOs should be used in launcher-sourceSet val jnaVersion = "5.15.0"
// (in a separate module for demo project and in testMain). val voyagerVersion = "1.1.0-beta02"
// With compose.desktop.common you will also lose @Preview functionality
// Core
implementation(compose.desktop.currentOs) implementation(compose.desktop.currentOs)
// Material Design
implementation(compose.materialIconsExtended)
// Speech to Text // Speech to Text
implementation("com.alphacephei:vosk:0.3.45") implementation("com.alphacephei:vosk:0.3.45")
//Java Native Access
implementation("net.java.dev.jna:jna:5.15.0") //Java Native Access var capturing = false
implementation("net.java.dev.jna:jna-platform:5.15.0") implementation("net.java.dev.jna:jna:$jnaVersion")
implementation("net.java.dev.jna:jna-platform:$jnaVersion")
//Coroutines //Coroutines
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.9.0") implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.9.0")
// Voyager
// Navigator
implementation("cafe.adriel.voyager:voyager-navigator:$voyagerVersion")
// Screen Model
implementation("cafe.adriel.voyager:voyager-screenmodel:$voyagerVersion")
// BottomSheetNavigator
implementation("cafe.adriel.voyager:voyager-bottom-sheet-navigator:$voyagerVersion")
// TabNavigator
implementation("cafe.adriel.voyager:voyager-tab-navigator:$voyagerVersion")
// Transitions
implementation("cafe.adriel.voyager:voyager-transitions:$voyagerVersion")
// Deep Java Library
implementation("ai.djl:api:$djlVersion")
implementation("ai.djl.huggingface:tokenizers:$djlVersion")
implementation("ai.djl.pytorch:pytorch-engine:$djlVersion")
implementation("ai.djl.pytorch:pytorch-jni:2.4.0-$djlVersion")
// Semantic Kernel
implementation("com.microsoft.semantic-kernel:semantickernel-api:1.3.0")
// SLF4J
implementation("org.slf4j:slf4j-api:2.0.16")
} }
compose.desktop { compose.desktop {

View File

@ -1,37 +1,92 @@
import androidx.compose.desktop.ui.tooling.preview.Preview import androidx.compose.foundation.layout.Row
import androidx.compose.material.Button import androidx.compose.material.Button
import androidx.compose.material.MaterialTheme import androidx.compose.material.Icon
import androidx.compose.material.Text import androidx.compose.material.Scaffold
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.ArrowBackIosNew
import androidx.compose.material.icons.filled.PlayArrow
import androidx.compose.material.icons.filled.Stop
import androidx.compose.runtime.Composable import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember import androidx.compose.runtime.saveable.rememberSaveable
import androidx.compose.runtime.setValue import androidx.compose.runtime.setValue
import androidx.compose.ui.window.Window import androidx.compose.ui.window.Window
import androidx.compose.ui.window.application import androidx.compose.ui.window.application
import cafe.adriel.voyager.navigator.CurrentScreen
import cafe.adriel.voyager.navigator.LocalNavigator
import cafe.adriel.voyager.navigator.Navigator
import cafe.adriel.voyager.navigator.currentOrThrow
import kotlinx.coroutines.DelicateCoroutinesApi import kotlinx.coroutines.DelicateCoroutinesApi
import kotlinx.coroutines.GlobalScope import processing.SemanticSimilarity
import kotlinx.coroutines.async import screens.Home
import kotlinx.coroutines.runBlocking import screens.Settings
fun main() = application {
val testSentence = "This is a test sentence for semantic similarity"
val testCommands = listOf("This is a test", "This sentence is completely different", "Testing semantic similarity between sentence", "An unrelated statement")
val semanticTester = SemanticSimilarity()
semanticTester.compareSentenceToList(testSentence, testCommands)
Window(onCloseRequest = ::exitApplication) {
Navigator(Home()) { navigator ->
Scaffold(
topBar = { topBar(navigator.size) },
content = { CurrentScreen() },
bottomBar = { }
)
}
}
}
@Composable
fun topBar(screenCount: Int) {
Row {
if (screenCount > 1) {
backButton()
}
captureButton()
Settings.settingsButton()
}
}
@Composable
fun backButton() {
val navigator = LocalNavigator.currentOrThrow
Button(onClick = { navigator.pop() }) {
Icon(
imageVector = Icons.Filled.ArrowBackIosNew,
contentDescription = "Go Back"
)
}
}
val captureDuration = Settings.userProperties.getProperty("capture_duration").toInt()
@Composable
fun getCaptureIcon(capturing: Boolean) = if (!capturing) Icons.Filled.PlayArrow else Icons.Filled.Stop
@Composable
fun getCaptureDescription(capturing: Boolean) = if (!capturing) "Start" else "Stop"
fun getCaptureOnClick(mic: Microphone) = if (!mic.capturing) mic.startCapture(captureDuration) else mic.stopCapture()
@OptIn(DelicateCoroutinesApi::class) @OptIn(DelicateCoroutinesApi::class)
@Composable @Composable
@Preview fun captureButton() {
fun App() {
var text by remember { mutableStateOf("Hello, World!") }
MaterialTheme {
Button(onClick = {
val mic = Microphone() val mic = Microphone()
GlobalScope.async { mic.startCapture() } var isCapturing by rememberSaveable { mutableStateOf(mic.capturing) }
}) { Button(
Text(text) onClick = {
getCaptureOnClick(mic)
isCapturing = !isCapturing
} }
) {
Icon(
imageVector = getCaptureIcon(isCapturing),
contentDescription = getCaptureDescription(isCapturing) + " audio capture"
)
} }
} }
fun main() = application {
Window(onCloseRequest = ::exitApplication) {
App()
}
}

View File

@ -1,14 +1,17 @@
import kotlinx.coroutines.coroutineScope import kotlinx.coroutines.DelicateCoroutinesApi
import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.async
import javax.sound.sampled.AudioFormat import javax.sound.sampled.AudioFormat
import javax.sound.sampled.AudioSystem import javax.sound.sampled.AudioSystem
import javax.sound.sampled.DataLine import javax.sound.sampled.DataLine
import javax.sound.sampled.TargetDataLine import javax.sound.sampled.TargetDataLine
import kotlinx.coroutines.launch import processing.STT
/** /**
* TODO: Documentation * TODO: Documentation
*/ */
class Microphone { class Microphone {
var capturing = false
/** /**
* *
*/ */
@ -16,25 +19,27 @@ class Microphone {
/** /**
* *
*/ */
private val audioFormat = getFormat() private val audioFormat = AudioFormat(16000.0f, 16, 1, true, false)
/** /**
* *
*/ */
suspend fun startCapture() { @OptIn(DelicateCoroutinesApi::class)
fun startCapture(captureDuration: Int) {
capturing = true
source.open(audioFormat, source.bufferSize) source.open(audioFormat, source.bufferSize)
source.start() source.start()
val buffer = ByteArray(160000) val buffer = ByteArray(32000 * captureDuration) // sampleRate * sampleSizeInBits * channels * duration
//TODO: Start loop in own coroutine GlobalScope.async {
coroutineScope {
launch {
while (true) {
val audioBytes = source.read(buffer, 0, buffer.size)
val stt = STT() val stt = STT()
while (capturing) {
val audioBytes = source.read(buffer, 0, buffer.size)
val result = stt.parseBuffer(buffer, audioBytes) val result = stt.parseBuffer(buffer, audioBytes)
println("Captured Speech: $result") println("Captured Speech: $result")
//TODO: Pass onto semantic similarity
} }
} stt.closeModel()
} }
} }
/** /**
@ -42,24 +47,8 @@ class Microphone {
*/ */
fun stopCapture() { fun stopCapture() {
source.stop() source.stop()
}
/**
*
*/
fun closeSource() {
source.close() source.close()
} capturing = false
/**
*
*/
private fun getFormat(): AudioFormat {
return AudioFormat(
16000.0f, // 16000 Required
16,
1,
true,
false
)
} }
/** /**
* *

View File

@ -1,6 +1,10 @@
package processing
import org.vosk.Model import org.vosk.Model
import org.vosk.Recognizer import org.vosk.Recognizer
import java.nio.file.Paths import java.nio.file.Paths
import kotlin.io.path.absolutePathString
/** /**
* TODO: Documentation * TODO: Documentation
*/ */
@ -9,10 +13,12 @@ class STT {
* *
*/ */
private val model = Model(getModelPath()) private val model = Model(getModelPath())
/** /**
* *
*/ */
private val recognizer = Recognizer(model, 16000.0f) private val recognizer = Recognizer(model, 16000.0f)
/** /**
* *
*/ */
@ -23,17 +29,16 @@ class STT {
recognizer.partialResult recognizer.partialResult
} }
} }
fun closeModel() { fun closeModel() {
recognizer.close() recognizer.close()
} }
/** /**
* *
*/ */
private fun getModelPath(): String { private fun getModelPath() = Paths.get(
return Paths.get( javaClass.getResource("/STT-model")!!.toURI()
javaClass.getResource("STT-model")!!.toURI() ).absolutePathString()
).toFile().absolutePath
}
//TODO: Install model into dir from resource link
} }

View File

@ -0,0 +1,73 @@
package processing
import ai.djl.Application
import ai.djl.Model
import ai.djl.huggingface.tokenizers.Encoding
import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer;
import ai.djl.huggingface.translator.TextEmbeddingTranslator
import ai.djl.huggingface.translator.TextEmbeddingTranslatorFactory
import ai.djl.huggingface.zoo.HfModelZoo
import ai.djl.ndarray.NDArray
import ai.djl.ndarray.NDList
import ai.djl.ndarray.NDManager
import ai.djl.repository.zoo.Criteria
import ai.djl.training.util.ProgressBar
import ai.djl.translate.DeferredTranslatorFactory
import java.nio.file.Paths
import kotlin.math.sqrt
class SemanticSimilarity {
val manager = NDManager.newBaseManager()
val model = getCriteria().loadModel().newPredictor()
val tokenizer = HuggingFaceTokenizer.newInstance("sentence-transformers/all-mpnet-base-v2")
fun getCriteria(): Criteria<NDList, NDList> = Criteria.builder()
.setTypes(NDList::class.java, NDList::class.java)
.optModelPath(Paths.get(
javaClass.getResource("/Semantic-model")!!.toURI()
))
.optProgress(ProgressBar())
.build()
fun compareSentenceToList(sentence: String, list: List<String>) {
val sentenceEmbedding = generateEmbedding(sentence)
val similarities = list.map { string ->
Pair(
string,
cosineSimilarity(sentenceEmbedding, generateEmbedding(string))
)
}
similarities.forEach { similarity ->
println("Similarity between '$sentence' and '${similarity.first}': ${similarity.second}")
}
}
fun generateEmbedding(input: String): NDArray {
val inputList = tokenizer.encode(input).toNDList(manager, false)
val inputIds = inputList[0].expandDims(0)
val attentionMask = inputList[1].expandDims(0)
return model.predict(
NDList(inputIds, attentionMask)
)[0].mean(intArrayOf(1)).normalize(2.0, 1)
}
companion object {
fun cosineSimilarity(input: NDArray, command: NDArray): Float {
val inputVec = input.squeeze(0)
val commandVec = command.squeeze(0)
val dotProduct = input.matMul(commandVec).getFloat()
val normInput = calculateNorm(inputVec)
val normCommand = calculateNorm(commandVec)
return dotProduct / (normInput * normCommand)
}
fun calculateNorm(array: NDArray) = sqrt(array.dot(array).getFloat())
}
}

View File

@ -0,0 +1,33 @@
package screens
import Microphone
import androidx.compose.desktop.ui.tooling.preview.Preview
import androidx.compose.foundation.layout.Row
import androidx.compose.material.Button
import androidx.compose.material.Icon
import androidx.compose.material.MaterialTheme
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.PlayArrow
import androidx.compose.material.icons.filled.Stop
import androidx.compose.runtime.Composable
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.saveable.rememberSaveable
import androidx.compose.ui.graphics.vector.ImageVector
import cafe.adriel.voyager.core.screen.Screen
import kotlinx.coroutines.DelicateCoroutinesApi
import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.async
import androidx.compose.runtime.getValue
import androidx.compose.runtime.setValue
class Home: Screen {
@Composable
@Preview
override fun Content() {
MaterialTheme {
Row {
}
}
}
}

View File

@ -0,0 +1,4 @@
package screens
class Profiles {
}

View File

@ -0,0 +1,58 @@
package screens
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.material.Button
import androidx.compose.material.Icon
import androidx.compose.material.Text
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.Settings
import androidx.compose.runtime.Composable
import cafe.adriel.voyager.core.screen.Screen
import cafe.adriel.voyager.navigator.LocalNavigator
import cafe.adriel.voyager.navigator.currentOrThrow
import java.io.FileInputStream
import java.util.Properties
class Settings: Screen {
@Composable
override fun Content() {
Column {
userProperties.iterator().forEach { property ->
Row { //TODO: Make look nice
Text("${property.key}")
Text("${property.value}") //TODO: This should be a different input element dependent on expected dtype (maybe use prefixes to determine)
}
}
}
}
companion object {
val userProperties = getProperties()
@Composable
fun settingsButton() {
val navigator = LocalNavigator.currentOrThrow
Button(onClick = {
navigator.push(Settings())
}) {
Icon(
Icons.Filled.Settings,
contentDescription = "screens.Settings"
)
}
}
fun getProperties(): Properties {
val properties = Properties()
properties.load(
FileInputStream(
javaClass.getResource("/user.properties")!!.file
)
)
return properties
}
}
}

View File

@ -0,0 +1 @@
capture_duration=5