==== //ccare/main/LiveJournal/htdocs/interests.bml#3 - /u0/jaffray/p4/LiveJournal/htdocs/interests.bml ====
@@ -11,6 +11,16 @@
  my $qinterest = $dbh->quote($FORM{'int'});
  my $interest;
 
+ sub print_interest {
+     my $interest = shift;
+     my $eint = &BMLUtil::eurl($interest);
+     return "<a href='/interests.bml?int=$eint'>$interest</a>";
+ }
+ sub db_error {
+    my $dbh = shift;
+    return "(=H1 Error H1=)(=P Database error: " . $dbh->errstr . " P=)";
+ }
+
  if ($FORM{'view'} eq "popular") 
  {
      my $ret = "";
@@ -23,8 +33,7 @@
      while (my ($int, $count) = $sth->fetchrow_array)
      {
 	 next if ($count == 1);
-	 my $eint = &BMLUtil::eurl($int);
-	 $ret .= "<TR><TD><A HREF=\"/interests.bml?int=$eint\">$int</A></TD><TD>$count</TD></TR>\n";
+	 $ret .= "<TR><TD>" . print_interest($int) . "</TD><TD>$count</TD></TR>\n";
      }
      $ret .= "</TABLE>\n";
      return $ret;
@@ -63,39 +72,66 @@
      return $ret;
  }
 
+ # there is room to optimize the main query of this mode.
+ # since the most popular interests take the most time to query for but are also
+ # the least significant, you can query for matches to everything except the most
+ # popular few interests, then come up with a list of candidate users for the top 100,
+ # and query on the really popular interests restricting it to those candidates.
+ # i'd estimate a potential 2-3x speedup. -- alanj 2001/05/24
+
  if ($FORM{'mode'} eq "findsim_do")
  {
+     my $rows;
      my $ret = "";
+
+     # get userid
      my $quser = $dbh->quote(lc($FORM{'user'}));
      $sth = $dbh->prepare("SELECT userid FROM user WHERE user=$quser");
      $sth->execute;
-     my ($userid) = $sth->fetchrow_array;
-     return "(=H1 Error H1=)(=P Unknown user P=)" unless ($userid);
+     my ($userid) = $sth->fetchrow_array
+       or return "(=H1 Error H1=)(=P Unknown user P=)";
 
-     $sth = $dbh->prepare("SELECT intid FROM userinterests WHERE userid=$userid");
-     $sth->execute;
-     my @intids = ();
-     push @intids, $_->{'intid'} while ($_ = $sth->fetchrow_hashref);
+     # get interests
+     $rows = $dbh->selectall_arrayref("SELECT intid FROM userinterests WHERE userid=$userid")
+       or return db_error($dbh);
+     my @intids = map {$_->[0]} @$rows
+       or return "(=H1 Error H1=)(=P User $FORM{'user'} has no defined interests. P=)";
+     my $intid_in = join(", ", @intids);
+    
+     # get number of users on the system
+     $rows = $dbh->selectall_arrayref("SELECT COUNT(*) FROM user")
+       or return db_error($dbh);
+     my $log_total_users = log($rows->[0][0])/log(2);
+     
+     # get top matches
+     my $match_rows = $dbh->selectall_hashref("SELECT u.user, ui.userid, ROUND(SUM($log_total_users - LOG(i.intcount)/LOG(2))) AS 'score' FROM user u, userinterests ui, interests i WHERE u.userid=ui.userid AND ui.intid = i.intid AND u.statusvis='V' AND ui.intid IN ($intid_in) GROUP BY 1 ORDER BY 3 DESC, 1 ASC LIMIT 100")
+       or return db_error($dbh);
+     my $userid_in = join(", ", map {$_->{userid}} @$match_rows);
 
-     return "(=H1 Error H1=)(=P User $FORM{'user'} has no defined interests. P=)" unless (@intids);
-    
-     my $intid_in = join(", ", @intids);
-     $ret .= "(=H1 Similar Users H1=)(=P The following are the most related users to $FORM{'user'}. P=)";
-     $sth = $dbh->prepare("SELECT u.user, COUNT(*) AS 'count' FROM user u, userinterests ui WHERE u.userid=ui.userid AND u.statusvis='V' AND ui.intid IN ($intid_in) GROUP BY 1 ORDER BY 2 DESC, 1 ASC LIMIT 100");
-     $sth->execute;
-     $ret .= "<P><TABLE CELLPADDING=3><TR VALIGN=BOTTOM><TD><B>#</B></TD><TD><B>User</B></TD><TD><B>Similiar<BR>Interests</B></TD><TD><B>Percent Match</B></TD></TR>\n";
-     my $total = @intids;
-     my $count=0;
-     while ($_ = $sth->fetchrow_hashref)
-     {
-	 $count++;
-	 my $percent = sprintf("%.1f", $_->{'count'}*100/$total);
-	 $ret .= "<TR>";
-	 $ret .= "<TD>$count</TD>";
-	 $ret .= "<TD>(=LJUSER $_->{'user'} LJUSER=)</TD>";
-	 $ret .= "<TD>$_->{'count'}</TD><TD>($percent%)</TD></TR>\n";
+     # get common interests for top matches
+     $rows = $dbh->selectall_hashref("SELECT ui.userid, i.interest FROM userinterests ui, interests i WHERE ui.intid = i.intid AND ui.userid IN ($userid_in) AND i.intid IN ($intid_in)")
+       or return db_error($dbh);
+     my %common_interests = ();
+     foreach my $row (@$rows) {
+         push @{$common_interests{$row->{userid}}}, $row->{interest};
      }
-     $ret .= "</TABLE>\n";
+     
+      $ret .= "(=H1 Similar Users H1=)(=P The following are the most related users to $FORM{'user'}. P=)";
+      $ret .= "<P><TABLE CELLPADDING=3><TR VALIGN=BOTTOM><TD><B>#</B></TD><TD><B>User</B></TD><TD><B>Score</B></TD><TD><B>Common Interests</B></TD></TR>\n";
+      my $count = 0;
+      foreach my $match (@$match_rows) {
+ 	 $count++;
+ 	 $ret .= "<TR>";
+ 	 $ret .= "<TD>$count</TD>";
+ 	 $ret .= "<TD>(=LJUSER $match->{'user'} LJUSER=)</TD>";
+ 	 $ret .= "<TD align=center>$match->{'score'}</TD>";
+ 	 $ret .= "<TD>";
+         $ret .= join(", ", map {print_interest($_)}
+                                sort @{$common_interests{$match->{userid}}});
+         $ret .= "</TD>";
+         $ret .= "</TR>\n";
+      }
+      $ret .= "</TABLE>\n";
 
      return $ret;
  }
