diff --git a/pom.xml b/pom.xml
index 8169ff7..3eb4052 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,9 +10,11 @@
big-data
http://maven.apache.org
-
- UTF-8
-
+
+ UTF-8
+ 16
+ 16
+
@@ -26,5 +28,20 @@
4.7
test
+
+ org.slf4j
+ slf4j-api
+ 1.7.32
+
+
+ org.slf4j
+ slf4j-simple
+ 1.7.5
+
+
+ com.google.guava
+ guava
+ 31.0.1-jre
+
diff --git a/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
new file mode 100644
index 0000000..1f194e3
--- /dev/null
+++ b/src/main/java/nearsoft/academy/bigdata/recommendation/MovieRecommender.java
@@ -0,0 +1,135 @@
+package nearsoft.academy.bigdata.recommendation;
+
+import com.google.common.collect.BiMap;
+import com.google.common.collect.HashBiMap;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
+import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
+import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.stream.Collectors;
+import java.util.zip.GZIPInputStream;
+
+public class MovieRecommender {
+
+ private static final Logger log = LoggerFactory.getLogger(MovieRecommender.class);
+
+ public static final String ROOT_PATH = System.getProperty("user.dir");
+ private final String CSV_PATH = ROOT_PATH + "/src/main/resources/movies.csv";
+
+ private final Map users;
+ private final BiMap movies;
+
+ private Long totalReviews;
+ private static final Integer RECORDS = 1000000;
+ private static final Double THRESHOLD = 0.1;
+ private static final Integer RECOMMENDATIONS_NUM = 3;
+
+ public MovieRecommender(String filePath) {
+ users = new HashMap<>();
+ movies = HashBiMap.create();
+ totalReviews = 0L;
+ init(filePath);
+ }
+
+ public void init(String filePath){
+ try {
+ parseToCSV(readGZIPFile(filePath));
+ } catch (IOException e) {
+ log.error(e.getMessage());
+ }
+ }
+
+ public BufferedReader readGZIPFile(String filePath) throws IOException {
+ var fileInputStream = new FileInputStream(filePath);
+ var gzipInputStream = new GZIPInputStream(fileInputStream);
+ var inputStreamReader = new InputStreamReader(gzipInputStream);
+ return new BufferedReader(inputStreamReader);
+ }
+
+ public void parseToCSV(BufferedReader bufferedReader) throws IOException {
+
+ log.info("Parsing file to CSV");
+
+ var fileWriter = new FileWriter(CSV_PATH);
+
+ String userID = null, movieID = null, score = null;
+
+ String currentLine;
+ while ((currentLine = bufferedReader.readLine()) != null) {
+ if (currentLine.contains(":")) {
+
+ var field = currentLine.split(":", 2);
+ var fieldKey = field[0];
+ var fieldValue = field[1].trim();
+
+ switch (fieldKey) {
+ case "product/productId" -> {
+ movieID = fieldValue;
+ var movieNum = (long) (movies.size() + 1);
+ movies.putIfAbsent(movieID, movieNum);
+ }
+ case "review/userId" -> {
+ userID = fieldValue;
+ var userNum = (long) (users.size() + 1);
+ users.putIfAbsent(userID, userNum);
+ }
+ case "review/score" -> {
+ score = fieldValue;
+ totalReviews++;
+ }
+ }
+
+ if (Objects.nonNull(userID) && Objects.nonNull(movieID) && Objects.nonNull(score)) {
+ fileWriter.write(String.format("%s,%s,%s\n", users.get(userID), movies.get(movieID), score));
+ userID = movieID = score = null;
+ if (totalReviews % RECORDS == 0){
+ log.info(String.format("Parsed %d lines", totalReviews));
+ }
+ }
+
+ }
+ }
+ fileWriter.close();
+
+ log.info(String.format("File parsed. Total lines %d", totalReviews));
+ }
+
+ public long getTotalReviews() {
+ return totalReviews;
+ }
+
+ public long getTotalProducts() {
+ return movies.size();
+ }
+
+ public long getTotalUsers() {
+ return users.size();
+ }
+
+ public List getRecommendationsForUser(String userID) throws IOException, TasteException {
+ var file = new File(CSV_PATH);
+ var dataModel = new FileDataModel(file);
+ var userSimilarity = new PearsonCorrelationSimilarity(dataModel);
+ var neighborhood = new ThresholdUserNeighborhood(THRESHOLD, userSimilarity, dataModel);
+ var recommender = new GenericUserBasedRecommender(dataModel, neighborhood, userSimilarity);
+ return recommender.recommend(users.get(userID), RECOMMENDATIONS_NUM)
+ .stream()
+ .map(movie -> movies.inverse().get(movie.getItemID()))
+ .collect(Collectors.toList());
+ }
+
+}
diff --git a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
index 0d0b1fe..2cdbb48 100644
--- a/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
+++ b/src/test/java/nearsoft/academy/bigdata/recommendation/MovieRecommenderTest.java
@@ -6,6 +6,7 @@
import java.io.IOException;
import java.util.List;
+import static nearsoft.academy.bigdata.recommendation.MovieRecommender.ROOT_PATH;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.hasItem;
@@ -15,7 +16,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
- MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
+ MovieRecommender recommender = new MovieRecommender(ROOT_PATH + "/src/main/resources/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());