暇なのでPythonのサンプルをHaskellに移植してみました
「集合知プログラミング」のPythonのサンプルをHaskellで書いてみました。最近、協調フィルタリングのお勉強をしているのですが、コードを読んでいるだけではなかなかしっくりこないので、実際に書いてみました。
まず、Pythonのコード
映画の評価データ、Lisa Roseさんはそれぞれの映画を、2.5、3.5、3.0...で評価しているというデータ
#!/usr/bin/python
#! -*- coding: utf-8 -*-
critics = {
'Lisa Rose': {
'Lady in the Water': 2.5,
'Snake on a Plane': 3.5,
'Just My Luck': 3.0,
'Superman Returns': 3.5,
'You, Me and Dupree': 2.5,
'The Night Listener': 3.5
},
'Gene Seymour': {
'Lady in the Water': 3.0,
'Snake on a Plane': 3.5,
'Just My Luck': 1.5,
'Superman Returns': 5.0,
'The Night Listener': 3.0,
'You, Me and Dupree': 3.5
},
'Michael Phillips': {
'Lady in the Water': 2.5,
'Snake on a Plane': 3.0,
'Superman Returns': 3.5,
'The Night Listener': 4.0
},
'Claudia Puig': {
'Snake on a Plane': 3.5,
'Just My Luck': 3.0,
'The Night Listener': 4.5,
'Superman Returns': 4.0,
'You, Me and Dupree': 2.5
},
'Mick LaSalle': {
'Lady in the Water': 3.0,
'Snake on a Plane': 4.0,
'Just My Luck': 2.0,
'Superman Returns': 3.0,
'The Night Listener': 3.0,
'You, Me and Dupree': 2.0
},
'Jack Matthews': {
'Lady in the Water': 3.0,
'Snake on a Plane': 4.0,
'The Night Listener': 3.0,
'Superman Returns': 5.0,
'You, Me and Dupree': 3.5
},
'Toby': {
'Snake on a Plane': 4.5,
'You, Me and Dupree': 1.0,
'Superman Returns': 4.0
}
}
で、その評価データを利用して計算するmainコード、本に掲載されているとおり
#!/usr/bin/python
#! -*- coding: utf-8 -*-
from MovieEval import critics
def sim_distance(prefs, person1, person2):
si = {}
for item in prefs[person1]:
if item in prefs[person2]:
si[item] = 1
if len(si) == 0: return 0
sum_of_squere = sum([
pow(prefs[person1][item] - prefs[person2][item], 2)
for item in prefs[person1] if item in prefs[person2]
])
return 1 / (1 + sum_of_squere)
def calcScore(name, prefs):
print("%s:" % name)
for (nm, val) in prefs.items():
print(" %20s %20f" % (nm, sim_distance(critics, name, nm)))
print("--------------------------------------------------")
if __name__ == '__main__':
for (name, ev) in critics.items():
calcScore(name, critics)
これを実行すると
cuomo@karky7 ~ $ python recommendations.py
Jack Matthews:
Jack Matthews 1.000000
Mick LaSalle 0.137931
Claudia Puig 0.181818
Lisa Rose 0.200000
Toby 0.117647
Gene Seymour 0.800000
Michael Phillips 0.181818
--------------------------------------------------
Mick LaSalle:
Jack Matthews 0.137931
Mick LaSalle 1.000000
Claudia Puig 0.173913
Lisa Rose 0.307692
Toby 0.307692
Gene Seymour 0.129032
Michael Phillips 0.285714
--------------------------------------------------
Claudia Puig:
Jack Matthews 0.181818
Mick LaSalle 0.173913
Claudia Puig 1.000000
Lisa Rose 0.444444
Toby 0.235294
Gene Seymour 0.133333
Michael Phillips 0.571429
--------------------------------------------------
Lisa Rose:
Jack Matthews 0.200000
Mick LaSalle 0.307692
Claudia Puig 0.444444
Lisa Rose 1.000000
Toby 0.222222
Gene Seymour 0.142857
Michael Phillips 0.666667
--------------------------------------------------
Toby:
Jack Matthews 0.117647
Mick LaSalle 0.307692
Claudia Puig 0.235294
Lisa Rose 0.222222
Toby 1.000000
Gene Seymour 0.108108
Michael Phillips 0.285714
--------------------------------------------------
Gene Seymour:
Jack Matthews 0.800000
Mick LaSalle 0.129032
Claudia Puig 0.133333
Lisa Rose 0.142857
Toby 0.108108
Gene Seymour 1.000000
Michael Phillips 0.210526
--------------------------------------------------
Michael Phillips:
Jack Matthews 0.181818
Mick LaSalle 0.285714
Claudia Puig 0.571429
Lisa Rose 0.666667
Toby 0.285714
Gene Seymour 0.210526
Michael Phillips 1.000000
--------------------------------------------------
cuomo@karky7 ~ $
Michael PhillipsさんとMichael Phillipsさんの評価はまったく同じなので、スコアは1.0になるわけで、当たり前ですよね。1に近ければ近いほど評価が同じということ。
これをHaskellで書いてみた
基本的には同じ処理です、まず評価データmodule MovieEval (
Eval,
movName,
movEv,
getCritics
) where
import qualified Data.Map as M
data Eval = Eval {
movName :: String,
movEv :: Double
} deriving(Show, Eq)
getCritics :: M.Map String [Eval]
getCritics = M.fromList $
[ (
"Lisa Rose",
[
Eval "Lady in the Water" 2.5,
Eval "Snake on a Plane" 3.5,
Eval "Just My Luck" 3.0,
Eval "Superman Returns" 3.5,
Eval "You, Me and Dupree" 2.5,
Eval "The Night Listener" 3.5
]
),
(
"Gene Seymour",
[
Eval "Lady in the Water" 3.0,
Eval "Snake on a Plane" 3.5,
Eval "Just My Luck" 1.5,
Eval "Superman Returns" 5.0,
Eval "The Night Listener" 3.0,
Eval "You, Me and Dupree" 3.5
]
),
(
"Michael Phillips",
[
Eval "Lady in the Water" 2.5,
Eval "Snake on a Plane" 3.0,
Eval "Superman Returns" 3.5,
Eval "The Night Listener" 4.0
]
),
(
"Claudia Puig",
[
Eval "Snake on a Plane" 3.5,
Eval "Just My Luck" 3.0,
Eval "The Night Listener" 4.5,
Eval "Superman Returns" 4.0,
Eval "You, Me and Dupree" 2.5
]
),
(
"Mick LaSalle",
[
Eval "Lady in the Water" 3.0,
Eval "Snake on a Plane" 4.0,
Eval "Just My Luck" 2.0,
Eval "Superman Returns" 3.0,
Eval "The Night Listener" 3.0,
Eval "You, Me and Dupree" 2.0
]
),
(
"Jack Matthews",
[
Eval "Lady in the Water" 3.0,
Eval "Snake on a Plane" 4.0,
Eval "The Night Listener" 3.0,
Eval "Superman Returns" 5.0,
Eval "You, Me and Dupree" 3.5
]
),
(
"Toby",
[
Eval "Snake on a Plane" 4.5,
Eval "You, Me and Dupree" 1.0,
Eval "Superman Returns" 4.0
]
)
] -- End of List
で、評価の計算コード
import qualified Data.Map as M
import MovieEval
import Control.Monad (forM_)
import Text.Printf
import Debug.Trace
sim_distance :: M.Map String [Eval] -> String -> String -> Double
sim_distance perfs person1 = \person2 -> sum_of_square p1 (M.lookup person2 perfs)
where
p1 = M.lookup person1 perfs
sum_of_square :: Maybe [Eval] -> Maybe [Eval] -> Double
sum_of_square (Just p1) (Just p2) = 1 / (1 + total)
where
ev = map (\(v, w) -> (movEv v, movEv w)) [(x, y) |
x <- p1,
y <- p2,
movName x == movName y]
total = sum $ map (\(p1e, p2e) -> realToFrac (p1e - p2e) ^ 2) ev
sum_of_square _ _ = 0.0
calcScore :: String -> [String] -> (String -> String -> Double) -> IO()
calcScore name names f = do
putStrLn $ name ++ ":"
forM_ names $ \nm -> do
printf " %20s %20f\n" nm (f name nm)
putStrLn "--------------------------------------------------\n"
main :: IO()
main = do
let perfs = getCritics
names = M.keys perfs
mapM_ (\name -> calcScore name names (sim_distance perfs)) names
これをHaskellで実行すると
cuomo@karky7 ~ $ runhaskell recommendations.hs
Claudia Puig:
Claudia Puig 1.0
Gene Seymour 0.13333333333333333
Jack Matthews 0.18181818181818182
Lisa Rose 0.4444444444444444
Michael Phillips 0.5714285714285714
Mick LaSalle 0.17391304347826086
Toby 0.23529411764705882
--------------------------------------------------
Gene Seymour:
Claudia Puig 0.13333333333333333
Gene Seymour 1.0
Jack Matthews 0.8
Lisa Rose 0.14285714285714285
Michael Phillips 0.21052631578947367
Mick LaSalle 0.12903225806451613
Toby 0.10810810810810811
--------------------------------------------------
Jack Matthews:
Claudia Puig 0.18181818181818182
Gene Seymour 0.8
Jack Matthews 1.0
Lisa Rose 0.2
Michael Phillips 0.18181818181818182
Mick LaSalle 0.13793103448275862
Toby 0.11764705882352941
--------------------------------------------------
Lisa Rose:
Claudia Puig 0.4444444444444444
Gene Seymour 0.14285714285714285
Jack Matthews 0.2
Lisa Rose 1.0
Michael Phillips 0.6666666666666666
Mick LaSalle 0.3076923076923077
Toby 0.2222222222222222
--------------------------------------------------
Michael Phillips:
Claudia Puig 0.5714285714285714
Gene Seymour 0.21052631578947367
Jack Matthews 0.18181818181818182
Lisa Rose 0.6666666666666666
Michael Phillips 1.0
Mick LaSalle 0.2857142857142857
Toby 0.2857142857142857
--------------------------------------------------
Mick LaSalle:
Claudia Puig 0.17391304347826086
Gene Seymour 0.12903225806451613
Jack Matthews 0.13793103448275862
Lisa Rose 0.3076923076923077
Michael Phillips 0.2857142857142857
Mick LaSalle 1.0
Toby 0.3076923076923077
--------------------------------------------------
Toby:
Claudia Puig 0.23529411764705882
Gene Seymour 0.10810810810810811
Jack Matthews 0.11764705882352941
Lisa Rose 0.2222222222222222
Michael Phillips 0.2857142857142857
Mick LaSalle 0.3076923076923077
Toby 1.0
--------------------------------------------------
cuomo@karky7 ~ $
Pythonって直感的に書いていけるところはやはり早しいいなって思うところデスが、Haskellって関数を渡せるところが凄くいけてる感じがしますね。評価元となる人のパラメータで関数を一時的に保存しておいて、後から評価対象の人を渡すだけでグリグリ回せちゃうとこなんか感心してしまいます。チョットmapを多用し過ぎ感があったり、Applicative辺りを利用すればもっと綺麗に書けそうな気がしてならないのですが、ただいまHaskell勉強中なので、あまり突っ込まないでください。でも、こういう書き方もあるよっていうのがあったら知りたい次第でござりまする、では
0 件のコメント:
コメントを投稿