Compare commits

...

2 Commits

11 changed files with 324 additions and 54 deletions

View File

@ -1,7 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" project-jdk-name="17" project-jdk-type="JavaSDK">
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" project-jdk-name="21" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@ -2,5 +2,6 @@
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
<mapping directory="$PROJECT_DIR$/src/main/resources/Semantic-model" vcs="Git" />
</component>
</project>

View File

@ -16,18 +16,49 @@ repositories {
}
dependencies {
// Note, if you develop a library, you should use compose.desktop.common.
// compose.desktop.currentOs should be used in launcher-sourceSet
// (in a separate module for demo project and in testMain).
// With compose.desktop.common you will also lose @Preview functionality
val djlVersion = "0.30.0"
val jnaVersion = "5.15.0"
val voyagerVersion = "1.1.0-beta02"
// Core
implementation(compose.desktop.currentOs)
// Material Design
implementation(compose.materialIconsExtended)
// Speech to Text
implementation("com.alphacephei:vosk:0.3.45")
//Java Native Access
implementation("net.java.dev.jna:jna:5.15.0")
implementation("net.java.dev.jna:jna-platform:5.15.0")
//Java Native Access var capturing = false
implementation("net.java.dev.jna:jna:$jnaVersion")
implementation("net.java.dev.jna:jna-platform:$jnaVersion")
//Coroutines
implementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.9.0")
// Voyager
// Navigator
implementation("cafe.adriel.voyager:voyager-navigator:$voyagerVersion")
// Screen Model
implementation("cafe.adriel.voyager:voyager-screenmodel:$voyagerVersion")
// BottomSheetNavigator
implementation("cafe.adriel.voyager:voyager-bottom-sheet-navigator:$voyagerVersion")
// TabNavigator
implementation("cafe.adriel.voyager:voyager-tab-navigator:$voyagerVersion")
// Transitions
implementation("cafe.adriel.voyager:voyager-transitions:$voyagerVersion")
// Deep Java Library
implementation("ai.djl:api:$djlVersion")
implementation("ai.djl.huggingface:tokenizers:$djlVersion")
implementation("ai.djl.pytorch:pytorch-engine:$djlVersion")
implementation("ai.djl.pytorch:pytorch-jni:2.4.0-$djlVersion")
// Semantic Kernel
implementation("com.microsoft.semantic-kernel:semantickernel-api:1.3.0")
// SLF4J
implementation("org.slf4j:slf4j-api:2.0.16")
}
compose.desktop {

View File

@ -1,31 +1,92 @@
import androidx.compose.desktop.ui.tooling.preview.Preview
import androidx.compose.foundation.layout.Row
import androidx.compose.material.Button
import androidx.compose.material.MaterialTheme
import androidx.compose.material.Text
import androidx.compose.material.Icon
import androidx.compose.material.Scaffold
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.ArrowBackIosNew
import androidx.compose.material.icons.filled.PlayArrow
import androidx.compose.material.icons.filled.Stop
import androidx.compose.runtime.Composable
import androidx.compose.runtime.getValue
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.saveable.rememberSaveable
import androidx.compose.runtime.setValue
import androidx.compose.ui.window.Window
import androidx.compose.ui.window.application
@Composable
@Preview
fun App() {
var text by remember { mutableStateOf("Hello, World!") }
MaterialTheme {
Button(onClick = {
text = "Hello, Desktop!"
}) {
Text(text)
}
}
}
import cafe.adriel.voyager.navigator.CurrentScreen
import cafe.adriel.voyager.navigator.LocalNavigator
import cafe.adriel.voyager.navigator.Navigator
import cafe.adriel.voyager.navigator.currentOrThrow
import kotlinx.coroutines.DelicateCoroutinesApi
import processing.SemanticSimilarity
import screens.Home
import screens.Settings
fun main() = application {
val testSentence = "This is a test sentence for semantic similarity"
val testCommands = listOf("This is a test", "This sentence is completely different", "Testing semantic similarity between sentence", "An unrelated statement")
val semanticTester = SemanticSimilarity()
semanticTester.compareSentenceToList(testSentence, testCommands)
Window(onCloseRequest = ::exitApplication) {
App()
Navigator(Home()) { navigator ->
Scaffold(
topBar = { topBar(navigator.size) },
content = { CurrentScreen() },
bottomBar = { }
)
}
}
}
@Composable
fun topBar(screenCount: Int) {
Row {
if (screenCount > 1) {
backButton()
}
captureButton()
Settings.settingsButton()
}
}
@Composable
fun backButton() {
val navigator = LocalNavigator.currentOrThrow
Button(onClick = { navigator.pop() }) {
Icon(
imageVector = Icons.Filled.ArrowBackIosNew,
contentDescription = "Go Back"
)
}
}
val captureDuration = Settings.userProperties.getProperty("capture_duration").toInt()
@Composable
fun getCaptureIcon(capturing: Boolean) = if (!capturing) Icons.Filled.PlayArrow else Icons.Filled.Stop
@Composable
fun getCaptureDescription(capturing: Boolean) = if (!capturing) "Start" else "Stop"
fun getCaptureOnClick(mic: Microphone) = if (!mic.capturing) mic.startCapture(captureDuration) else mic.stopCapture()
@OptIn(DelicateCoroutinesApi::class)
@Composable
fun captureButton() {
val mic = Microphone()
var isCapturing by rememberSaveable { mutableStateOf(mic.capturing) }
Button(
onClick = {
getCaptureOnClick(mic)
isCapturing = !isCapturing
}
) {
Icon(
imageVector = getCaptureIcon(isCapturing),
contentDescription = getCaptureDescription(isCapturing) + " audio capture"
)
}
}

View File

@ -1,11 +1,17 @@
import kotlinx.coroutines.DelicateCoroutinesApi
import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.async
import javax.sound.sampled.AudioFormat
import javax.sound.sampled.AudioSystem
import javax.sound.sampled.DataLine
import javax.sound.sampled.TargetDataLine
import processing.STT
/**
* TODO: Documentation
*/
class Microphone {
var capturing = false
/**
*
*/
@ -13,37 +19,36 @@ class Microphone {
/**
*
*/
private val audioFormat = getFormat()
private val audioFormat = AudioFormat(16000.0f, 16, 1, true, false)
/**
*
*/
fun startCapture() {
@OptIn(DelicateCoroutinesApi::class)
fun startCapture(captureDuration: Int) {
capturing = true
source.open(audioFormat, source.bufferSize)
source.start()
//TODO: Start processing loop in new coroutine and check if it could be moved into own file
val buffer = ByteArray(32000 * captureDuration) // sampleRate * sampleSizeInBits * channels * duration
GlobalScope.async {
val stt = STT()
while (capturing) {
val audioBytes = source.read(buffer, 0, buffer.size)
val result = stt.parseBuffer(buffer, audioBytes)
println("Captured Speech: $result")
//TODO: Pass onto semantic similarity
}
stt.closeModel()
}
}
/**
*
*/
fun stopCapture() {
source.stop()
}
/**
*
*/
fun closeSource() {
source.close()
}
/**
*
*/
private fun getFormat(): AudioFormat {
return AudioFormat( //TODO: Get format settings from user settings
16000.0f, // 16000 Required
16,
2,
true,
true
)
capturing = false
}
/**
*
@ -53,9 +58,7 @@ class Microphone {
TargetDataLine::class.java,
audioFormat
)
val source = AudioSystem.getLine(info) as TargetDataLine
source.open(audioFormat)
return source
return AudioSystem.getLine(info) as TargetDataLine
}
}

View File

@ -1,6 +1,10 @@
package processing
import org.vosk.Model
import org.vosk.Recognizer
import java.nio.file.Paths
import kotlin.io.path.absolutePathString
/**
* TODO: Documentation
*/
@ -9,10 +13,12 @@ class STT {
*
*/
private val model = Model(getModelPath())
/**
*
*/
private val recognizer = Recognizer(model, 16000.0f)
/**
*
*/
@ -23,17 +29,16 @@ class STT {
recognizer.partialResult
}
}
fun closeModel() {
recognizer.close()
}
/**
*
*/
private fun getModelPath(): String {
return Paths.get(
javaClass.getResource("STT-model")!!.toURI()
).toFile().absolutePath
}
private fun getModelPath() = Paths.get(
javaClass.getResource("/STT-model")!!.toURI()
).absolutePathString()
//TODO: Install model into dir from resource link
}

View File

@ -0,0 +1,73 @@
package processing
import ai.djl.Application
import ai.djl.Model
import ai.djl.huggingface.tokenizers.Encoding
import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer;
import ai.djl.huggingface.translator.TextEmbeddingTranslator
import ai.djl.huggingface.translator.TextEmbeddingTranslatorFactory
import ai.djl.huggingface.zoo.HfModelZoo
import ai.djl.ndarray.NDArray
import ai.djl.ndarray.NDList
import ai.djl.ndarray.NDManager
import ai.djl.repository.zoo.Criteria
import ai.djl.training.util.ProgressBar
import ai.djl.translate.DeferredTranslatorFactory
import java.nio.file.Paths
import kotlin.math.sqrt
class SemanticSimilarity {
val manager = NDManager.newBaseManager()
val model = getCriteria().loadModel().newPredictor()
val tokenizer = HuggingFaceTokenizer.newInstance("sentence-transformers/all-mpnet-base-v2")
fun getCriteria(): Criteria<NDList, NDList> = Criteria.builder()
.setTypes(NDList::class.java, NDList::class.java)
.optModelPath(Paths.get(
javaClass.getResource("/Semantic-model")!!.toURI()
))
.optProgress(ProgressBar())
.build()
fun compareSentenceToList(sentence: String, list: List<String>) {
val sentenceEmbedding = generateEmbedding(sentence)
val similarities = list.map { string ->
Pair(
string,
cosineSimilarity(sentenceEmbedding, generateEmbedding(string))
)
}
similarities.forEach { similarity ->
println("Similarity between '$sentence' and '${similarity.first}': ${similarity.second}")
}
}
fun generateEmbedding(input: String): NDArray {
val inputList = tokenizer.encode(input).toNDList(manager, false)
val inputIds = inputList[0].expandDims(0)
val attentionMask = inputList[1].expandDims(0)
return model.predict(
NDList(inputIds, attentionMask)
)[0].mean(intArrayOf(1)).normalize(2.0, 1)
}
companion object {
fun cosineSimilarity(input: NDArray, command: NDArray): Float {
val inputVec = input.squeeze(0)
val commandVec = command.squeeze(0)
val dotProduct = input.matMul(commandVec).getFloat()
val normInput = calculateNorm(inputVec)
val normCommand = calculateNorm(commandVec)
return dotProduct / (normInput * normCommand)
}
fun calculateNorm(array: NDArray) = sqrt(array.dot(array).getFloat())
}
}

View File

@ -0,0 +1,33 @@
package screens
import Microphone
import androidx.compose.desktop.ui.tooling.preview.Preview
import androidx.compose.foundation.layout.Row
import androidx.compose.material.Button
import androidx.compose.material.Icon
import androidx.compose.material.MaterialTheme
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.PlayArrow
import androidx.compose.material.icons.filled.Stop
import androidx.compose.runtime.Composable
import androidx.compose.runtime.mutableStateOf
import androidx.compose.runtime.remember
import androidx.compose.runtime.saveable.rememberSaveable
import androidx.compose.ui.graphics.vector.ImageVector
import cafe.adriel.voyager.core.screen.Screen
import kotlinx.coroutines.DelicateCoroutinesApi
import kotlinx.coroutines.GlobalScope
import kotlinx.coroutines.async
import androidx.compose.runtime.getValue
import androidx.compose.runtime.setValue
class Home: Screen {
@Composable
@Preview
override fun Content() {
MaterialTheme {
Row {
}
}
}
}

View File

@ -0,0 +1,4 @@
package screens
class Profiles {
}

View File

@ -0,0 +1,58 @@
package screens
import androidx.compose.foundation.layout.Column
import androidx.compose.foundation.layout.Row
import androidx.compose.material.Button
import androidx.compose.material.Icon
import androidx.compose.material.Text
import androidx.compose.material.icons.Icons
import androidx.compose.material.icons.filled.Settings
import androidx.compose.runtime.Composable
import cafe.adriel.voyager.core.screen.Screen
import cafe.adriel.voyager.navigator.LocalNavigator
import cafe.adriel.voyager.navigator.currentOrThrow
import java.io.FileInputStream
import java.util.Properties
class Settings: Screen {
@Composable
override fun Content() {
Column {
userProperties.iterator().forEach { property ->
Row { //TODO: Make look nice
Text("${property.key}")
Text("${property.value}") //TODO: This should be a different input element dependent on expected dtype (maybe use prefixes to determine)
}
}
}
}
companion object {
val userProperties = getProperties()
@Composable
fun settingsButton() {
val navigator = LocalNavigator.currentOrThrow
Button(onClick = {
navigator.push(Settings())
}) {
Icon(
Icons.Filled.Settings,
contentDescription = "screens.Settings"
)
}
}
fun getProperties(): Properties {
val properties = Properties()
properties.load(
FileInputStream(
javaClass.getResource("/user.properties")!!.file
)
)
return properties
}
}
}

View File

@ -0,0 +1 @@
capture_duration=5