MovieLens (Julia) Laboratorio de implementación #97

vicgq · 2024-11-08T23:51:23Z

Solución al ejercicio de 'Movielens' utilizando Julia.

`using CSV
using DataFrames
using Base.Threads
using BenchmarkTools
using Printf

Procesa registros en chunks, combina géneros y guarda en archivos de salida

function process_records(records, output_index)
# Guardar el chunk en un archivo CSV
output_filename = "./movielens/ratings_$(lpad(output_index, 1, '0')).csv"
CSV.write(output_filename, records)
end

Función principal que divide el archivo de ratings en chunks y los procesa en paralelo

function Split_Ratings(total_jobs = 10)
# Leer archivos CSV
ratings_df = CSV.read("ratings.csv", DataFrame)

# Determinar tamaño de cada chunk
size_range = div(nrow(ratings_df), total_jobs)

# Procesar en paralelo utilizando múltiples hilos
@threads for i in 1:total_jobs
    println("   El Worker ", i, " está fraccionando Ratings.")
    start_idx = (i - 1) * size_range + 1
    end_idx = min(i * size_range, nrow(ratings_df))
    records_chunk = ratings_df[start_idx:end_idx, :]
    process_records(records_chunk, i)
end

end

function FindRatingsMaster(nF = 10)
#nF number of files with ratings
# kg is a 1D array that contains the Known Genders
kg = ["Action", "Adventure", "Animation", "Children", "Comedy", "Crime", "Documentary",
"Drama", "Fantasy", "Film-Noir", "Horror", "IMAX", "Musical", "Mystery", "Romance",
"Sci-Fi", "Thriller", "War", "Western", "(no genres listed)"]

ng = size(kg,1)       # ng is just the number of rows in kg
ra = zeros(ng,nF)     # ra is  2D arrayof
ca = zeros(ng,nF)

# dfm has all rows from Movies with cols :movieId, :genres 
dfm = CSV.read("movies.csv", DataFrame)
dfm = dfm[: , [:movieId, :genres] ]

dfr_v = [DataFrame() for _ in 1:nF]
@threads  for i=1:nF
    dfr_v[i] = CSV.read("./movielens/ratings_$(lpad(i, 1, '0')).csv", DataFrame)
    ra[:,i] , ca[:,i] = FindRatingsWorker( i, ng, kg, dfm, dfr_v[i])
end # @threads for 

# end # @everywhere  
# sra is an 1D array for summing the values of the Ratings for each genre
sra = zeros(ng)     
sca = zeros(ng)     
@sync for i =1:ng
    for j = 1:nF
        sra[i] += ra[i,j]
        sca[i] += ca[i,j]
    end
end

println("\n\n============================> RESULTADOS <============================")
@sync for i in 1:ng
    @printf("count = %14.2f   average = %14.2f   genre = %s\n", sca[i], sra[i]/sca[i], kg[i])
end
println("======================================================================\n")

end #FindRatingsMaster()

function FindRatingsWorker(w::Integer, ng::Integer, kg::Array, dfm::DataFrame, dfr::DataFrame)
println(" El Worker ", w, " está procesando Ratings con Movielens.")

ra = zeros(ng) # ra is an 1D array for keeping the values of the Ratings for each genre
ca = zeros(ng) # ca is an 1D array to keep the number of Ratings for each genre

# The inner join will have the following columns: {movieId, genre, rating}
ij = innerjoin(dfm, dfr, on = :movieId)
#println("El encabezado es: ", first(ij))
nij = size(ij, 1)

for i = 1:ng
    for j = 1:nij
        r = ij[j,:] # get all columns for row j, gender is col=2 of the row
        g = r[2]
        if (contains(g, kg[i]) == true)
            ca[i] += 1    # keep the count of ratings for thin genre
            ra[i] += r[4] # add the value for this genre
        end
    end
end

return ra, ca

end

function main(execute_split::Bool)
println("\nIniciando la función de partición:")
@time Split_Ratings()

println("\nIniciando unión de Ratings con Movies mediante hilos:")
@time FindRatingsMaster()

end

Main Function

@time main(true)`

Solución al ejercicio de 'Movielens' utilizando goroutines.

vicgq added 2 commits November 1, 2024 20:56

MovieLens (Go) Laboratorio de implementación

3ab79b3

Solución al ejercicio de 'Movielens' utilizando goroutines.

Merge branch 'camachojua:main' into main

61c0ecb

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

MovieLens (Julia) Laboratorio de implementación #97

MovieLens (Julia) Laboratorio de implementación #97

vicgq commented Nov 8, 2024

MovieLens (Julia) Laboratorio de implementación #97

Are you sure you want to change the base?

MovieLens (Julia) Laboratorio de implementación #97

Conversation

vicgq commented Nov 8, 2024

Procesa registros en chunks, combina géneros y guarda en archivos de salida

Función principal que divide el archivo de ratings en chunks y los procesa en paralelo

Main Function