Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 20 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
<name>big-data</name>
<url>http://maven.apache.org</url>

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>16</maven.compiler.source>
<maven.compiler.target>16</maven.compiler.target>
</properties>

<dependencies>
<dependency>
Expand All @@ -26,5 +28,20 @@
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.32</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.5</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>31.0.1-jre</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package nearsoft.academy.bigdata.recommendation;

import com.google.common.collect.BiMap;
import com.google.common.collect.HashBiMap;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;

public class MovieRecommender {

private static final Logger log = LoggerFactory.getLogger(MovieRecommender.class);

public static final String ROOT_PATH = System.getProperty("user.dir");
private final String CSV_PATH = ROOT_PATH + "/src/main/resources/movies.csv";

private final Map<String, Long> users;
private final BiMap<String, Long> movies;

private Long totalReviews;
private static final Integer RECORDS = 1000000;
private static final Double THRESHOLD = 0.1;
private static final Integer RECOMMENDATIONS_NUM = 3;

public MovieRecommender(String filePath) {
users = new HashMap<>();
movies = HashBiMap.create();
totalReviews = 0L;
init(filePath);
}

public void init(String filePath){
try {
parseToCSV(readGZIPFile(filePath));
} catch (IOException e) {
log.error(e.getMessage());
}
}

public BufferedReader readGZIPFile(String filePath) throws IOException {
var fileInputStream = new FileInputStream(filePath);
var gzipInputStream = new GZIPInputStream(fileInputStream);
var inputStreamReader = new InputStreamReader(gzipInputStream);
return new BufferedReader(inputStreamReader);
}

public void parseToCSV(BufferedReader bufferedReader) throws IOException {

log.info("Parsing file to CSV");

var fileWriter = new FileWriter(CSV_PATH);

String userID = null, movieID = null, score = null;

String currentLine;
while ((currentLine = bufferedReader.readLine()) != null) {
if (currentLine.contains(":")) {

var field = currentLine.split(":", 2);
var fieldKey = field[0];
var fieldValue = field[1].trim();

switch (fieldKey) {
case "product/productId" -> {
movieID = fieldValue;
var movieNum = (long) (movies.size() + 1);
movies.putIfAbsent(movieID, movieNum);
}
case "review/userId" -> {
userID = fieldValue;
var userNum = (long) (users.size() + 1);
users.putIfAbsent(userID, userNum);
}
case "review/score" -> {
score = fieldValue;
totalReviews++;
}
}

if (Objects.nonNull(userID) && Objects.nonNull(movieID) && Objects.nonNull(score)) {
fileWriter.write(String.format("%s,%s,%s\n", users.get(userID), movies.get(movieID), score));
userID = movieID = score = null;
if (totalReviews % RECORDS == 0){
log.info(String.format("Parsed %d lines", totalReviews));
}
}

}
}
fileWriter.close();

log.info(String.format("File parsed. Total lines %d", totalReviews));
}

public long getTotalReviews() {
return totalReviews;
}

public long getTotalProducts() {
return movies.size();
}

public long getTotalUsers() {
return users.size();
}

public List<String> getRecommendationsForUser(String userID) throws IOException, TasteException {
var file = new File(CSV_PATH);
var dataModel = new FileDataModel(file);
var userSimilarity = new PearsonCorrelationSimilarity(dataModel);
var neighborhood = new ThresholdUserNeighborhood(THRESHOLD, userSimilarity, dataModel);
var recommender = new GenericUserBasedRecommender(dataModel, neighborhood, userSimilarity);
return recommender.recommend(users.get(userID), RECOMMENDATIONS_NUM)
.stream()
.map(movie -> movies.inverse().get(movie.getItemID()))
.collect(Collectors.toList());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.io.IOException;
import java.util.List;

import static nearsoft.academy.bigdata.recommendation.MovieRecommender.ROOT_PATH;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.matchers.JUnitMatchers.hasItem;
Expand All @@ -15,7 +16,7 @@ public class MovieRecommenderTest {
public void testDataInfo() throws IOException, TasteException {
//download movies.txt.gz from
// http://snap.stanford.edu/data/web-Movies.html
MovieRecommender recommender = new MovieRecommender("/path/to/movies.txt.gz");
MovieRecommender recommender = new MovieRecommender(ROOT_PATH + "/src/main/resources/movies.txt.gz");
assertEquals(7911684, recommender.getTotalReviews());
assertEquals(253059, recommender.getTotalProducts());
assertEquals(889176, recommender.getTotalUsers());
Expand Down