Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ build
hs_err_pid*
replay_pid*

models/*.gguf
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes sure we don't accidentally check in a chonker .gguf file.

src/main/cpp/de_kherud_llama_*.h
src/main/resources/**/*.so
src/main/resources/**/*.dylib
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@ There are multiple [examples](src/test/java/examples). Make sure to set `model.h
```bash
mvn exec:java -Dexec.mainClass="examples.MainExample" -Dmodel.home="/path/to/models" -Dmodel.name="codellama-13b.Q5_K_M.gguf"
```
Note: if your model is in the `models` directory, then you can ommit the `-Dmodel.home` property.

You can also run some integration tests, which will automatically download a model to `model.home`:
You can also run some integration tests, which will automatically download a model to the `models` directory:

```bash
mvn verify -Dmodel.home=/path/to/models
mvn verify
```

### No Setup required
Expand Down
5 changes: 4 additions & 1 deletion build-args.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ else()
set(LLAMA_METAL_DEFAULT OFF)
endif()

# general
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm genuinely not sure why this caused issues. If you have ideas or alternative approaches, do let me know.


# instruction set specific
if (LLAMA_NATIVE)
set(INS_ENB OFF)
Expand Down Expand Up @@ -633,4 +636,4 @@ if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
endif()
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
add_compile_definitions(_BSD_SOURCE)
endif()
endif()
3 changes: 3 additions & 0 deletions models/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Local Model Directory
This directory contains models which will be automatically downloaded
for use in java-llama.cpp's unit tests.
4 changes: 3 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
<junit.version>4.13.1</junit.version>
<test.plugin.version>3.2.3</test.plugin.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<integration.test.model>mistral-7b-instruct-v0.2.Q5_K_S.gguf</integration.test.model>
<model.home>${project.basedir}/models</model.home>
<integration.test.model>mistral-7b-instruct-v0.2.Q2_K.gguf</integration.test.model>
<integration.test.model.url>https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/${integration.test.model}</integration.test.model.url>
</properties>

Expand Down Expand Up @@ -110,6 +111,7 @@
<systemPropertyVariables>
<propertyName>model.home</propertyName>
<integration.test.model>${integration.test.model}</integration.test.model>
<model.home>${model.home}</model.home>
</systemPropertyVariables>
</configuration>
<executions>
Expand Down
6 changes: 4 additions & 2 deletions src/test/java/de/kherud/llama/LlamaModelIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ public void testGenerateGrammar() {
String output = sb.toString();

Assert.assertTrue(output.matches("[ab]+"));
Assert.assertEquals(nPredict, model.encode(output).length);
int generated = model.encode(output).length;
Assert.assertTrue(generated > 0 && generated <= nPredict);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suppose the number of tokens generated parameter is an upper bound rather than a guarantee. It's consistently off by 1 in the 2 bit quantization.

}

@Test
Expand Down Expand Up @@ -126,7 +127,8 @@ public void testCompleteGrammar() {
.setNPredict(nPredict);
String output = model.complete("", params);
Assert.assertTrue(output.matches("[ab]+"));
Assert.assertEquals(nPredict, model.encode(output).length);
int generated = model.encode(output).length;
Assert.assertTrue(generated > 0 && generated <= nPredict);
}

@Test
Expand Down
4 changes: 4 additions & 0 deletions src/test/java/de/kherud/llama/ModelResolver.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package de.kherud.llama;

import java.io.File;
import java.nio.file.Paths;


Expand All @@ -22,6 +23,9 @@ public enum ModelResolver {
public String resolve() {
String ret = System.getProperty(systemPropertyName);
if(ret == null) {
if(new File("models").exists()) {
return "models";
}
throw new IllegalArgumentException(String.format(errorMessage, systemPropertyName));
}
return ret;
Expand Down