Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
afd99b4
feat: prepare `DiffNode` for storing `endif` lines
eugen-shulimov Oct 26, 2024
4bed4b5
feat: prepare `VariationTreeNode` for storing `endif` lines
eugen-shulimov Oct 26, 2024
22d5dea
feat: add a getter for the `endif` line in `VariationNode`
eugen-shulimov Oct 26, 2024
b694ce5
fix: handle `endif` lines in deep copies of variation trees
eugen-shulimov Oct 26, 2024
02f7340
feat: create an unparser for variation trees
eugen-shulimov Oct 28, 2024
e9fb7c5
feat: store `endif` lines when parsing variation diffs
eugen-shulimov Oct 30, 2024
92b9247
feat: create an unparser for variation diffs
eugen-shulimov Nov 1, 2024
e2934fe
feat: create a method that projects text diffs
eugen-shulimov Nov 2, 2024
2c18279
refactor: move some code for readability
eugen-shulimov Nov 4, 2024
a8f5306
test: create tests for VariationUnparser
eugen-shulimov Nov 7, 2024
af4eb0f
feat: create a dataset for my bachelor thesis
eugen-shulimov Nov 27, 2024
5408c72
feat: create an experiment for testing the unparser
eugen-shulimov Nov 27, 2024
b5a2a8c
fix: fix the analysis in the unparser experiment
eugen-shulimov Nov 28, 2024
870b76a
feat: change the dataset of the unparse experiment
eugen-shulimov Dec 3, 2024
5bdb8f1
feat: change some things in the analysis
eugen-shulimov Dec 5, 2024
efb4b2c
feat: add error reporting to UnparseAnalysis
pmbittner Dec 5, 2024
dee62f8
fix: store endifs depending on the time
eugen-shulimov Dec 21, 2024
dd54859
test: add an example of failing to unparse
eugen-shulimov Dec 21, 2024
e312978
fix: add new lines to the unparsed code
eugen-shulimov Dec 21, 2024
f743e2c
feat: rework the unparse experiment evaluation
eugen-shulimov Dec 21, 2024
53b82e1
test: add a test for comparing unparsed diffs semantically
eugen-shulimov Dec 21, 2024
ff46999
fix: fix removeWhitespace for diffs
eugen-shulimov Feb 10, 2025
6ac3440
test: fail tests if an exception is thrown
ibbem Aug 9, 2025
d18386f
test: use asserts instead of manually checking stdout
ibbem Aug 9, 2025
0a35f62
test: remove unnecessary wrappers around unparser
ibbem Aug 9, 2025
b9fa82b
test: reuse directory constants in the unparsing tests
ibbem Aug 9, 2025
bb59058
test: improve the variable names in the unparse tests
ibbem Aug 9, 2025
79b0a27
test: factor out duplicate code in the unparser tests
ibbem Aug 9, 2025
6bcde64
test: make all unparser test helper methods private
ibbem Aug 9, 2025
84e5eab
test: refactor the unparser test case sources
ibbem Aug 9, 2025
69b406f
test: split different parse options into separate unparse tests
ibbem Aug 9, 2025
a8065d9
fix: fix a bug in the variation diff `endif` parsing
ibbem Aug 10, 2025
0db0fdd
refactor: directly pass the time to popIfChain in the diff parser
ibbem Aug 10, 2025
9c91b08
docs: improve some JavaDoc comments related to unparsing
ibbem Aug 10, 2025
fbd6f3c
refactor: refactor VariationUnparser.undiff
ibbem Aug 10, 2025
68d8131
refactor: rename the unparse methods
ibbem Aug 10, 2025
9de8a26
refactor: remove duplicated tree unparsing code
ibbem Aug 10, 2025
cbf504b
fix: fix `Show.baddiff`
ibbem Aug 11, 2025
c860a88
fix: always preserve the child order in `BadVDiff`
ibbem Aug 11, 2025
de10f75
refactor: store `endif`s in the label
ibbem Aug 10, 2025
bf6f5fb
fix: preserve the projection when splitting a `DiffNode`
ibbem Sep 4, 2025
f22b3cd
feat: let `DiffNode` take a `VariationLabel` on construction
ibbem Sep 4, 2025
fd60dfb
refactor: use `char` instead of `String` in `DiffSymbol`
ibbem Sep 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/datasets/eugen-bachelor-thesis.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Project name | Domain | Source code available (\*\*y\*\*es/\*\*n\*\*o)? | Is it a git repository (\*\*y\*\*es/\*\*n\*\*o)? | Repository URL | Clone URL | Estimated number of commits
-------------------|-------------------------|-------------------------------------------------|--------------------------------------------------|--------------------------------------------------------------|----------------------------------------------------|---------------------------------
berkeley-db-libdb | database system | y | y | https://github.com/berkeleydb/libdb | https://github.com/berkeleydb/libdb.git | 7
sylpheed | e-mail client | y | y | https://github.com/jan0sch/sylpheed | https://github.com/jan0sch/sylpheed.git | 2,682
vim | text editor | y | y | https://github.com/vim/vim | https://github.com/vim/vim.git | 17,109
57 changes: 57 additions & 0 deletions replication/thesis-es/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# syntax=docker/dockerfile:1

FROM alpine:3.15
# PACKAGE STAGE

# Prepare the compile environment. JDK is automatically installed
RUN apk add maven

# Create and navigate to a working directory
WORKDIR /home/user

COPY local-maven-repo ./local-maven-repo

# Copy the source code
COPY src ./src
# Copy the pom.xml if Maven is used
COPY pom.xml .
# Execute the maven package process
RUN mvn package || exit

FROM alpine:3.15

# Create a user
RUN adduser --disabled-password --home /home/sherlock --gecos '' sherlock

RUN apk add --no-cache --upgrade bash
RUN apk add --update openjdk17

# Change into the home directory
WORKDIR /home/sherlock

# Copy the compiled JAR file from the first stage into the second stage
# Syntax: COPY --from=STAGE_ID SOURCE_PATH TARGET_PATH
WORKDIR /home/sherlock/holmes
COPY --from=0 /home/user/target/diffdetective-*-jar-with-dependencies.jar ./DiffDetective.jar
WORKDIR /home/sherlock
RUN mkdir results

# Copy the setup
COPY docs holmes/docs

# Copy the docker resources
COPY docker/* ./
COPY replication/thesis-es/docker/* ./
RUN mkdir DiffDetectiveMining

# Adjust permissions
RUN chown sherlock:sherlock /home/sherlock -R
RUN chmod +x execute.sh
RUN chmod +x entrypoint.sh
RUN chmod +x fix-perms.sh

# Set the entrypoint
ENTRYPOINT ["./entrypoint.sh", "./execute.sh"]

# Set the user
USER sherlock
49 changes: 49 additions & 0 deletions replication/thesis-es/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
![Maven](https://github.com/VariantSync/DiffDetective/actions/workflows/maven.yml/badge.svg)
[![Documentation](https://img.shields.io/badge/Documentation-Read-purple)][documentation]
[![Install](https://img.shields.io/badge/Install-Instructions-blue)](INSTALL.md)
[![GitHubPages](https://img.shields.io/badge/GitHub%20Pages-online-blue.svg?style=flat)][website]
[![License](https://img.shields.io/badge/License-GNU%20LGPLv3-blue)](../../LICENSE.LGPL3)

# Unparsing Experiment
This is an experiment for the bachelor thesis by Eugen Shulimov which tests the unparser for variation trees and diffs.

### Prerequisite
All following commands assume that working directory of your terminal is the `thesis-es` directory. Please switch directories, if this is not the case:
```shell
cd DiffDetective/replication/thesis-es
```

### Build the Docker container
Start the docker deamon.
Clone this repository.
Open a terminal and navigate to the root directory of this repository.
To build the Docker container you can run the `build` script corresponding to your operating system.
#### Windows:
`.\build.bat`
#### Linux/Mac (bash):
`./build.sh`

### Start the experiment
To execute the experiment you can run the `execute`script corresponding to your operating system.

#### Windows:
`.\execute.bat
#### Linux/Mac (bash):
`./execute.sh

> If you want to stop the execution, you can call the provided script for stopping the container in a separate terminal.
> When restarted, the execution will continue processing by restarting at the last unfinished repository.
> #### Windows:
> `.\stop-execution.bat`
> #### Linux/Mac (bash):
> `./stop-execution.sh`

You might see warnings or errors reported from SLF4J like `Failed to load class "org.slf4j.impl.StaticLoggerBinder"` which you can safely ignore.

### View the results in the [results][resultsdir] directory
All raw results are stored in the [results][resultsdir] directory.

[documentation]: https://variantsync.github.io/DiffDetective/docs/javadoc/
[website]: https://variantsync.github.io/DiffDetective/

[resultsdir]: results
19 changes: 19 additions & 0 deletions replication/thesis-es/build.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@echo off
setlocal

set "targetSubPath=thesis-es"

rem Get the current directory
for %%A in ("%CD%") do set "currentDir=%%~nxA"

rem Check if the current directory ends with the target sub-path

if "%currentDir:~-9%"=="%targetSubPath%" (
cd ..\..
docker build -t diff-detective-unparse -f replication\thesis-es\Dockerfile .
@pause
) else (
echo error: the script must be run from inside the thesis-es directory, i.e., DiffDetective\replication\%targetSubPath%
)
endlocal

11 changes: 11 additions & 0 deletions replication/thesis-es/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

# We have to switch to the root directory of the project and build the Docker image from there,
# because Docker only allows access to the files in the current file system subtree (i.e., no access to ancestors).
# We have to do this to get access to 'src', 'docker', 'local-maven-repo', etc.
# For resiliency against different working directories during execution of this
# script we calculate the correct path using the special bash variable
# BASH_SOURCE.
cd "$(dirname "${BASH_SOURCE[0]}")/../.." || exit

docker build -t diff-detective-unparse -f replication/thesis-es/Dockerfile .
6 changes: 6 additions & 0 deletions replication/thesis-es/docker/DOCKER.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Docker Files

This directory contains the files that are required to run the Docker container.

## Execution
The [`execute.sh`](execute.sh) script can be adjusted to run the program that should be executed by the Docker container.
11 changes: 11 additions & 0 deletions replication/thesis-es/docker/execute.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/env bash

cd /home/sherlock/holmes || exit

echo "Running the experiment."
java -cp DiffDetective.jar org.variantsync.diffdetective.experiments.thesis-es.Main

echo "Collecting results."
cp -r results/* ../results/
echo "The results are located in the 'results' directory."

15 changes: 15 additions & 0 deletions replication/thesis-es/execute.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
@echo off
setlocal

set "targetSubPath=thesis-es"

rem Get the current directory
for %%A in ("%CD%") do set "currentDir=%%~nxA"

rem Check if the current directory ends with the target sub-path
if "%currentDir:~-9%"=="%targetSubPath%" (
docker run --rm -v "%cd%\results":"/home/sherlock/results" diff-detective-unparse %*
) else (
echo error: the script must be run from inside the thesis-es directory, i.e., DiffDetective\replication\%targetSubPath%
)
endlocal
8 changes: 8 additions & 0 deletions replication/thesis-es/execute.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/usr/bin/env bash
# Assure that the script is only called from the folder cotaining this script
cd "$(dirname "${BASH_SOURCE[0]}")" || exit

if [[ $# -gt 0 ]]; then
echo "Executing $1"
fi
docker run --rm -v "$(pwd)/results":"/home/sherlock/results" diff-detective-unparse "$@"
4 changes: 4 additions & 0 deletions replication/thesis-es/results/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore
3 changes: 3 additions & 0 deletions replication/thesis-es/stop-execution.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@echo "Stopping all running simulations. This will take a moment..."
@FOR /f "tokens=*" %%i IN ('docker ps -a -q --filter "ancestor=diff-detective-unparse"') DO docker stop %%i
@echo "...done."
4 changes: 4 additions & 0 deletions replication/thesis-es/stop-execution.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/usr/bin/env bash
echo "Stopping Docker container. This will take a moment..."
docker stop "$(docker ps -a -q --filter "ancestor=diff-detective-unparse")"
echo "...done."
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
package org.variantsync.diffdetective.experiments.thesis_es;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.stream.Stream;
import org.variantsync.diffdetective.AnalysisRunner;
import org.variantsync.diffdetective.analysis.Analysis;
import org.variantsync.diffdetective.datasets.DatasetDescription;
import org.variantsync.diffdetective.datasets.DefaultDatasets;
import org.variantsync.diffdetective.datasets.PatchDiffParseOptions;
import org.variantsync.diffdetective.datasets.PatchDiffParseOptions.DiffStoragePolicy;
import org.variantsync.diffdetective.datasets.Repository;
import org.variantsync.diffdetective.diff.git.DiffFilter;
import org.variantsync.diffdetective.variation.diff.parse.VariationDiffParseOptions;

public class Main {

public static void main(String[] args) throws IOException {
startAnalysis();
evaluationAnalysis(Path.of("docs", "datasets", "eugen-bachelor-thesis.md"));

}

private static void startAnalysis() throws IOException {
final AnalysisRunner.Options analysisOptions = new AnalysisRunner.Options(
Paths.get("..", "DiffDetectiveReplicationDatasets"),
Paths.get("results", "thesis_es"),
Paths.get("docs", "datasets", "eugen-bachelor-thesis.md"),
repo -> new PatchDiffParseOptions(
DiffStoragePolicy.REMEMBER_FULL_DIFF,
VariationDiffParseOptions.Default),
repo -> new DiffFilter.Builder().allowMerge(true)
.allowedFileExtensions("c", "cpp").build(),
true,
false);

AnalysisRunner.run(analysisOptions, extractionRunner());
}

protected static BiConsumer<Repository, Path> extractionRunner() {
return (repo, repoOutputDir) -> {

final BiFunction<Repository, Path, Analysis> AnalysisFactory = (r, out) -> new Analysis("Thesis Eugen Shulimov",
List.of(new UnparseAnalysis()), r, out);

Analysis.forEachCommit(() -> AnalysisFactory.apply(repo, repoOutputDir));

};
}

/**
* Verarbeitet die Ergebnisse der Analyse um aus einzelnen
* Angaben eine gesamt Übersicht zu bekommen
*
* @param path Pfad zu der markdown datei, aus der die repositories für die
* Analyse stammen
* @throws IOException
*/
private static void evaluationAnalysis(Path path) throws IOException {
int count = 0;
int[] diffTest = { 0, 0, 0, 0, 0, 0, 0, 0 };
int[] diffSemEqTest = { 0, 0, 0, 0, };
int[] treeTest = { 0, 0, 0, 0, 0, 0, 0, 0 };
final List<DatasetDescription> datasets = DefaultDatasets.loadDatasets(path);

for (DatasetDescription description : datasets) {
Stream<Path> files = Files
.list(Path.of("results", "thesis_es", description.name()))
.filter(filename -> filename.getFileName().toString().endsWith(".thesis_es.csv"));
for (Path tempPath : files.toList()) {
String[] splitFileData = Files.readString(tempPath).split("\n");
for (int j = 1; j < splitFileData.length; j++) {
String[] splitLineData = splitFileData[j].split(";");
for (int i = 0; i < splitLineData.length; i++) {
splitLineData[i] = parseNumberStringToIntWithLengthGreaterOne(splitLineData[i]);
}
count = count + 1;
diffTest[0] = diffTest[0] + Integer.parseInt(splitLineData[8]);
diffTest[1] = diffTest[1] + Integer.parseInt(splitLineData[9]);
diffTest[2] = diffTest[2] + Integer.parseInt(splitLineData[10]);
diffTest[3] = diffTest[3] + Integer.parseInt(splitLineData[11]);
diffTest[4] = diffTest[4] + Integer.parseInt(splitLineData[12]);
diffTest[5] = diffTest[5] + Integer.parseInt(splitLineData[13]);
diffTest[6] = diffTest[6] + Integer.parseInt(splitLineData[14]);
diffTest[7] = diffTest[7] + Integer.parseInt(splitLineData[15]);
diffSemEqTest[0] = diffSemEqTest[0] + Integer.parseInt(splitLineData[16]);
diffSemEqTest[1] = diffSemEqTest[1] + Integer.parseInt(splitLineData[17]);
diffSemEqTest[2] = diffSemEqTest[2] + Integer.parseInt(splitLineData[18]);
diffSemEqTest[3] = diffSemEqTest[3] + Integer.parseInt(splitLineData[19]);
treeTest[0] = treeTest[0] + Integer.parseInt(splitLineData[20]);
treeTest[1] = treeTest[1] + Integer.parseInt(splitLineData[21]);
treeTest[2] = treeTest[2] + Integer.parseInt(splitLineData[22]);
treeTest[3] = treeTest[3] + Integer.parseInt(splitLineData[23]);
treeTest[4] = treeTest[4] + Integer.parseInt(splitLineData[24]);
treeTest[5] = treeTest[5] + Integer.parseInt(splitLineData[25]);
treeTest[6] = treeTest[6] + Integer.parseInt(splitLineData[26]);
treeTest[7] = treeTest[7] + Integer.parseInt(splitLineData[27]);
treeTest[0] = treeTest[0] + Integer.parseInt(splitLineData[28]);
treeTest[1] = treeTest[1] + Integer.parseInt(splitLineData[29]);
treeTest[2] = treeTest[2] + Integer.parseInt(splitLineData[30]);
treeTest[3] = treeTest[3] + Integer.parseInt(splitLineData[31]);
treeTest[4] = treeTest[4] + Integer.parseInt(splitLineData[32]);
treeTest[5] = treeTest[5] + Integer.parseInt(splitLineData[33]);
treeTest[6] = treeTest[6] + Integer.parseInt(splitLineData[34]);
treeTest[7] = treeTest[7] + Integer.parseInt(splitLineData[35]);

}
}
}
List<String> result = new ArrayList<>();
result.add("Anzahl geprüfter Diffs : " + count + "\n");
result.add("Anzahl syntaktisch korrekter Diffs mit MultiLine0 und EmptyLine0 : " + diffTest[0] + "\n");
result.add("Anzahl syntaktisch korrekter Diffs ohne Whitespace mit MultiLine0 und EmptyLine0 : " + diffTest[4]
+ "\n");
result.add("Anzahl semantisch korrekter Diffs mit MultiLine0 und EmptyLine0 : " + diffSemEqTest[0] + "\n");
result.add("Anzahl von Diffs mit MultiLine0 und EmptyLine0, welche keine Korrektheitskriterium erfühlt haben : "
+ (count - diffSemEqTest[0]) + "\n");

result.add("Anzahl syntaktisch korrekter Diffs mit MultiLine1 und EmptyLine0 : " + diffTest[1] + "\n");
result.add("Anzahl syntaktisch korrekter Diffs ohne Whitespace mit MultiLine1 und EmptyLine0 : " + diffTest[5]
+ "\n");
result.add("Anzahl semantisch korrekter Diffs mit MultiLine1 und EmptyLine0 : " + diffSemEqTest[1] + "\n");
result.add("Anzahl von Diffs mit MultiLine1 und EmptyLine0, welche keine Korrektheitskriterium erfühlt haben : "
+ (count - diffSemEqTest[1]) + "\n");

result.add("Anzahl syntaktisch korrekter Diffs mit MultiLine0 und EmptyLine1 : " + diffTest[2] + "\n");
result.add("Anzahl syntaktisch korrekter Diffs ohne Whitespace mit MultiLine0 und EmptyLine1 : " + diffTest[6]
+ "\n");
result.add("Anzahl semantisch korrekter Diffs mit MultiLine0 und EmptyLine1 : " + diffSemEqTest[2] + "\n");
result.add("Anzahl von Diffs mit MultiLine0 und EmptyLine1, welche keine Korrektheitskriterium erfühlt haben : "
+ (count - diffSemEqTest[2]) + "\n");

result.add("Anzahl syntaktisch korrekter Diffs mit MultiLine1 und EmptyLine1 : " + diffTest[3] + "\n");
result.add("Anzahl syntaktisch korrekter Diffs ohne Whitespace mit MultiLine1 und EmptyLine1 : " + diffTest[7]
+ "\n");
result.add("Anzahl semantisch korrekter Diffs mit MultiLine1 und EmptyLine1 : " + diffSemEqTest[3] + "\n");
result.add("Anzahl von Diffs mit MultiLine1 und EmptyLine1, welche keine Korrektheitskriterium erfühlt haben : "
+ (count - diffSemEqTest[3]) + "\n");

result.add("-------------------------------------------------------------------------------------------");
result.add("Anzahl geprüfter Trees : " + count * 2 + "\n");
result.add("Anzahl syntaktisch korrekter Trees mit MultiLine0 und EmptyLine0 : " + treeTest[0] + "\n");
result.add("Anzahl syntaktisch korrekter Trees ohne Whitespace mit MultiLine0 und EmptyLine0 : " + treeTest[4]
+ "\n");
result.add("Anzahl von Trees mit MultiLine0 und EmptyLine0, welche keine Korrektheitskriterium erfühlt haben : "
+ (2 * count - treeTest[4]) + "\n");

result.add("Anzahl syntaktisch korrekter Trees mit MultiLine1 und EmptyLine0 : " + treeTest[1] + "\n");
result.add("Anzahl syntaktisch korrekter Trees ohne Whitespace mit MultiLine1 und EmptyLine0 : " + treeTest[5]
+ "\n");
result.add("Anzahl von Trees mit MultiLine1 und EmptyLine0, welche keine Korrektheitskriterium erfühlt haben : "
+ (2 * count - treeTest[5]) + "\n");

result.add("Anzahl syntaktisch korrekter Trees mit MultiLine0 und EmptyLine1 : " + treeTest[2] + "\n");
result.add("Anzahl syntaktisch korrekter Trees ohne Whitespace mit MultiLine0 und EmptyLine1 : " + treeTest[6]
+ "\n");
result.add("Anzahl von Trees mit MultiLine0 und EmptyLine1, welche keine Korrektheitskriterium erfühlt haben : "
+ (2 * count - treeTest[6]) + "\n");

result.add("Anzahl syntaktisch korrekter Trees mit MultiLine1 und EmptyLine1 : " + treeTest[3] + "\n");
result.add("Anzahl syntaktisch korrekter Trees ohne Whitespace mit MultiLine1 und EmptyLine1 : " + treeTest[7]
+ "\n");
result.add("Anzahl von Trees mit MultiLine1 und EmptyLine1, welche keine Korrektheitskriterium erfühlt haben : "
+ (2 * count - treeTest[7]) + "\n");

Files.write(Path.of("results", "thesis_es", "resultOfAnalysis.txt"), result);

}

private static String parseNumberStringToIntWithLengthGreaterOne(String string) {
string = string.trim();
for (char c : string.toCharArray()) {
if (Character.isDigit(c)) {
return Character.toString(c);
}
}
return "";
}
}
Loading