| Paste number 78901: | Collective Intelligence in Erlang Pt. I |
| Pasted by: | Shey |
| When: | 1 year, 3 months ago |
| Share: | Tweet this! | http://paste.lisp.org/+1OVP |
| Channel: | None |
| Paste contents: |
-module(pci).
-export([sim_distance/2, sim_pearson/2, sim_manhattan/2, sim_tanimoto/2, sim_chebyshev/2, get_critics/0, main/3]).
main(Person1, Person2, SimFunction) ->
%% Think I should do something with fold or filter here, not sure what
%% but this code is too verbose and needs to be refactored
Critics = get_critics(),
P1Keys = dict:fetch_keys(dict:fetch(Person1, Critics)),
P2Keys = dict:fetch_keys(dict:fetch(Person2, Critics)),
%% Find the common keys,
Common = intersection(P1Keys , P2Keys),
X = lists:map(fun(Key)-> dict:fetch(Key, dict:fetch(Person1, Critics)) end, Common),
Y = lists:map(fun(Key)-> dict:fetch(Key, dict:fetch(Person2, Critics)) end, Common),
Sim = SimFunction(X,Y),
Sim.
sim_distance(FeatureList1, FeatureList2) when length(FeatureList1) /= 0, length(FeatureList1) == length(FeatureList1) ->
Sum_of_squares = lists:sum([ math:pow(X-Y,2) || {X,Y} <- lists:zip(FeatureList1,FeatureList2) ]),
1.0 / (1.0 + math:sqrt(Sum_of_squares)).
sim_pearson(X, Y) when length(X) == length(Y) ->
Len = length(X),
%% sum
SumX = lists:sum(X),
SumY = lists:sum(Y),
%% sum of squares
SumXSq=lists:sum([math:pow(N,2) || N <- X]),
SumYSq=lists:sum([math:pow(N,2) || N <- Y]),
%% sum of products
Sum_products = lists:sum( [ A*B || {A,B} <- lists:zip(X,Y)]),
%% Calculate Pearson Score
Numer = Sum_products - (SumX*SumY/Len),
case math:sqrt( (SumXSq - math:pow(SumX,2) /Len) * (SumYSq - math:pow(SumY,2)/Len)) of
0.0 -> 0.0;
Denom -> Numer/Denom %% Don't put a comma or semi-colon here
end.
sim_manhattan(X,Y) when length(X) == length(Y) ->
case length(X) of
0 -> 0.0;
_ ->
Sum = lists:sum([ abs(Xn-Yn) || {Xn,Yn} <- lists:zip(X,Y)]),
1.0/ (1.0 + Sum)
end.
sim_tanimoto(X,Y) when length(X) == length(Y) ->
C1 = lists:sum( [ 1 || Element <- X, Element /= 0 ] ),
C2 = lists:sum( [ 1 || Element <- Y, Element /= 0 ] ),
Share = lists:sum ( [1 || {X1, Y1} <- lists:zip(X,Y), X1 /= 0, Y1 /= 0]),
1.0 - (Share/(C1 + C2 - Share)).
sim_chebyshev(X,Y) when length(X) /= 0, length(X) == length(Y) ->
Max = lists:max([ abs(Xn-Yn) || {Xn,Yn} <- lists:zip(X,Y)]),
1.0 / (1.0 + Max).
get_critics() ->
dict:from_list([
{
"Lisa Rose",
dict:from_list( [{"Lady in the Water", 2.5},{"Snakes on a Plane", 3.5},
{"Just My Luck", 3.0},{"Superman Returns", 3.5},
{"You, Me and Dupree", 2.5},{"The Night Listener", 3.0}])
},
{"Mick LaSalle",
dict:from_list( [{"Lady in the Water", 3.0},{"Snakes on a Plane", 4.0},
{"Just My Luck", 2.0},{"Superman Returns", 3.0},
{"You, Me and Dupree", 2.0},{"The Night Listener", 3.0}])
},
{"Jack Matthews",
dict:from_list( [{"Lady in the Water", 3.0},{"Snakes on a Plane", 4.0},
{"Superman Returns", 5.0},{"You, Me and Dupree", 3.5},
{"The Night Listener", 3.0}])
},
{"Shey",
dict:from_list( [{"Snakes on a Plane", 2.5},{"Superman Returns", 3.0},
{"You, Me and Dupree", 3.0}])}
]).
intersection(L1,L2) ->
lists:filter(fun(X) -> lists:member(X,L1) end, L2).
This paste has no annotations.