==== //ccare/main/LiveJournal/htdocs/interests.bml#3 - /u0/jaffray/p4/LiveJournal/htdocs/interests.bml ====
@@ -11,6 +11,16 @@
  my $qinterest = $dbh->quote($FORM{'int'});
  my $interest;
 
+ sub print_interest {
+     my $interest = shift;
+     my $eint = &BMLUtil::eurl($interest);
+     return "<a href='/interests.bml?int=$eint'>$interest</a>";
+ }
+ sub db_error {
+    my $dbh = shift;
+    return "(=H1 Error H1=)(=P Database error: " . $dbh->errstr . " P=)";
+ }
+
  if ($FORM{'view'} eq "popular") 
  {
      my $ret = "";
@@ -23,8 +33,7 @@
      while (my ($int, $count) = $sth->fetchrow_array)
      {
 	 next if ($count == 1);
-	 my $eint = &BMLUtil::eurl($int);
-	 $ret .= "<TR><TD><A HREF=\"/interests.bml?int=$eint\">$int</A></TD><TD>$count</TD></TR>\n";
+	 $ret .= "<TR><TD>" . print_interest($int) . "</TD><TD>$count</TD></TR>\n";
      }
      $ret .= "</TABLE>\n";
      return $ret;
@@ -63,6 +72,13 @@
      return $ret;
  }
 
+ # there is room to optimize the main query of this mode.
+ # since the most popular interests take the most time to query for but are also
+ # the least significant, you can query for matches to everything except the most
+ # popular few interests, then come up with a list of candidate users for the top 100,
+ # and query on the really popular interests restricting it to those candidates.
+ # i'd estimate a potential 2-3x speedup. -- alanj 2001/05/24
+
  if ($FORM{'mode'} eq "findsim_do")
  {
      my $ret = "";
@@ -79,13 +95,18 @@
 
      return "(=H1 Error H1=)(=P User $FORM{'user'} has no defined interests. P=)" unless (@intids);
     
+     my $rows = $dbh->selectall_arrayref("SELECT COUNT(*) FROM user")
+       or return db_error($dbh);
+     my $log_total_users = log($rows->[0][0])/log(2);
+     
      my $intid_in = join(", ", @intids);
      $ret .= "(=H1 Similar Users H1=)(=P The following are the most related users to $FORM{'user'}. P=)";
-     $sth = $dbh->prepare("SELECT u.user, COUNT(*) AS 'count' FROM user u, userinterests ui WHERE u.userid=ui.userid AND u.statusvis='V' AND ui.intid IN ($intid_in) GROUP BY 1 ORDER BY 2 DESC, 1 ASC LIMIT 100");
+     $sth = $dbh->prepare("SELECT u.user, ui.userid, ROUND(SUM($log_total_users - LOG(i.intcount)/LOG(2))) AS 'score' FROM user u, userinterests ui, interests i WHERE u.userid=ui.userid AND ui.intid = i.intid AND u.statusvis='V' AND ui.intid IN ($intid_in) GROUP BY 1 ORDER BY 3 DESC, 1 ASC LIMIT 100");
      $sth->execute;
-     $ret .= "<P><TABLE CELLPADDING=3><TR VALIGN=BOTTOM><TD><B>#</B></TD><TD><B>User</B></TD><TD><B>Similiar<BR>Interests</B></TD><TD><B>Percent Match</B></TD></TR>\n";
+     $sth2 = $dbh->prepare("SELECT i.interest FROM userinterests ui, interests i WHERE ui.intid = i.intid AND ui.intid IN ($intid_in) AND ui.userid = ? ORDER BY interest");
+     $ret .= "<P><TABLE CELLPADDING=3><TR VALIGN=BOTTOM><TD><B>#</B></TD><TD><B>User</B></TD><TD><B>Score</B></TD><TD><B>Common Interests</B></TD></TR>\n";
      my $total = @intids;
-     my $count=0;
+     my $count = 0;
      while ($_ = $sth->fetchrow_hashref)
      {
 	 $count++;
@@ -93,7 +114,11 @@
 	 $ret .= "<TR>";
 	 $ret .= "<TD>$count</TD>";
 	 $ret .= "<TD>(=LJUSER $_->{'user'} LJUSER=)</TD>";
-	 $ret .= "<TD>$_->{'count'}</TD><TD>($percent%)</TD></TR>\n";
+	 $ret .= "<TD align=center>$_->{'score'}</TD>";
+         $sth2->execute($_->{'userid'});
+         my $rows = $sth2->fetchall_arrayref;
+	 $ret .= "<TD>" . join(", ", map {print_interest($_->[0])} @$rows) . "</TD>";
+         $ret .= "</TR>\n";
      }
      $ret .= "</TABLE>\n";
 
