<% function getparas($fname){ $data = file($fname); $ii = 0; $jj = 0; while(!empty($data[$ii])){ $line=$data[$ii]; //$line = str_replace("\r\n", "", $line); $line = str_replace("\r", "", $line); $line = str_replace("\n", "", $line); $line = str_replace("\t", " ", $line); $line = trim($line); if($line == "") { $line = trim($paras[$jj]); if($line != "") $jj++; } else $paras[$jj] = $paras[$jj] . $line . " "; $ii++; } if($paras[$jj] != "") $jj++; $paras["count"] = $jj; return $paras; } function getwords($data, $thresh){ $jj = 0; $parts = explode(" ", $data); $kk = 0; while(!empty($parts[$kk])){ if(strlen($parts[$kk]) >= $thresh){ $words[$jj] = trim($parts[$kk]); $jj++; } $kk++; } $ii++; $words["count"] = $jj; return $words; } function lcs($words1, $words2, $flag){ $a = $words1["count"]; $b = $words2["count"]; $c = $a; $max = $a; if ($b < $a) $c = $b; if ($b > $a) $max = $b; $ii = 0; while($words1[$ii] == $words2[$ii] and $ii < $c)$ii++; $hh = $ii; if($hh < $c){ for($ii = $hh; $ii < $b; $ii++) $lcb[0][$ii] = 0; $k0 = 0; $k1 = 1; for($ii = $hh; $ii < $a; $ii++){ if($k0 == 0){ $k0 = 1; $k1 = 0; } else { $k0 = 0; $k1 = 1; } for($jj = $hh; $jj < $b; $jj++){ if ($words1[$ii] == $words2[$jj]) { $lcb[$k1][$jj] = $lcb[$k0][$jj-1] + 1; } elseif ($lcb[$k0][$jj] >= $lcb[$k1][$jj-1]) { $lcb[$k1][$jj] = $lcb[$k0][$jj]; } else { $lcb[$k1][$jj] = $lcb[$k1][$jj-1]; } if($flag) $lc[$ii][$jj] = $lcb[$k1][$jj]; } } $total = $lcb[$k1][$b-1]; } $total = $total + $hh; $same = true; if($flag and $total <> $max){ $same = false; for($ii = 0; $ii < $a; $ii++) $lc[$ii][$hh - 1] = -1; for($ii = 0; $ii < $b; $ii++) $lc[$hh - 1][$ii] = -1; $ii = $a - 1; $jj = $b - 1; $kk = 0; while (($ii >= $hh or $jj >= $hh)) { if($ii == $hh and $jj == $hh){ $out[$kk] = "+" . $words2[$jj]; $kk++; $out[$kk] = "-" . $words1[$ii]; $kk++; $ii--; $jj--; } else{ if($lc[$ii][$jj] == $lc[$ii][$jj-1]){ $out[$kk] = "+" . $words2[$jj]; $jj--; $kk++; } elseif($lc[$ii][$jj] == $lc[$ii-1][$jj]){ $out[$kk] = "-" . $words1[$ii]; $ii--; $kk++; } else{ $out[$kk] = "0" . $words1[$ii]; $ii--; $jj--; $kk++; } } } for ($ii = $hh - 1; $ii >= 0; $ii--){ $out[$kk] = "0" . $words1[$ii]; $kk++; } $out["count"] = $kk; $result["out"] = $out; } $result["total"] = $total; $result["same"] = $same; return $result; } $start = time(); //$args = explode(" ", $arg); $args[0] = $src_file; $args[1] = $trg_file; $paras1 = getparas($args[0]); $paras2 = getparas($args[1]); $base_plus = ""; $base_minus = ""; $base_table_plus = ""; $base_table_minus_p = ""; $base_table_minus_x = ""; $base_table_else = ""; $a_plus = $base_plus; $a_minus = $base_minus; $a_table_plus = $base_table_plus; $a_table_minus_p = $base_table_minus_p; $a_table_minus_x = $base_table_minus_x; $a_table_else = $base_table_else; $a_plus_font = ""; $a_minus_font = ""; if($paras1["count"] < $paras2["count"]){ $paras3 = $paras1; $paras1 = $paras2; $paras2 = $paras3; $a_plus = $base_minus; $a_minus = $base_plus; $a_table_plus = $base_table_minus_p; $a_table_minus_p = $base_table_plus; $a_table_minus_x = $base_table_else; $a_table_else = $base_table_table_minus_x; $a_plus_font = ""; $a_minus_font = ""; } for($ii = 0; $ii < $paras1["count"]; $ii++) $ptok1[$ii] = "1p" . $ii; $ptok1["count"] = $paras1["count"]; for($ii = 0; $ii < $paras2["count"]; $ii++) $ptok2[$ii] = "2p" . $ii; $ptok2["count"] = $paras2["count"]; // // Tuning Parameters $thresh = 11; $cutoff = .33; $ymin = 8; $tmin = 4; // // $cutoff2 = $cutoff/2; $targ = -1; for($ii = 0; $ii <= $paras1["count"]; $ii++){ $test = $thresh; $nfound = true; $words0 = getwords($paras1[$ii], 1); $wrds = $words0["count"]; if($wrds <= $ymin) $test = 2; $words1 = getwords($paras1[$ii], $test); $y1 = $words1["count"]; if($wrds > $ymin){ while($test > $tmin and $y1 < $ymin){ $test--; $words1 = getwords($paras1[$ii], $test); $y1 = $words1["count"]; } } //echo "
" . $ii . " " . $targ . " " . $test . " " . $y1 . " " . (time() - $xxx); flush(); $xxx = time(); if($y1 > 0){ $d = 0; $continue = true; $targ++; while ($continue){ $jj = $targ + $d; if($d >= 0) $d = -$d - 1; else $d = -$d; $jjx = $targ + $d; if($d >= 0) { $jj0 = $jj; $jj1 = $jjx; } else { $jj0 = $jjx; $jj1 = $jj; } if($jj0 < 0 and $jj1 > $paras2["count"]) $continue = false; else { if($jj >= 0 and $jj <= $paras2["count"] and substr($ptok2[$jj],0,1) == "2"){ $words2 = getwords($paras2[$jj], $test); $lcs0 = lcs($words1, $words2, false); $x = $lcs0["total"]; $y2 = $words2["count"]; if($y2 > 0){ $z = $x/($y1 + $y2); if($z > $cutoff2) { $words2 = getwords($paras2[$jj], 1); $lcs = lcs($words0, $words2, true); $x = $lcs["total"]; $y2 = $words2["count"]; $z = $x/($words0["count"] + $y2); if($z > $cutoff2){ if($lcs["same"] == false){ $paras1[$ii] = ""; $out = $lcs["out"]; $kmax = $out["count"]; for ($aa = $kmax - 1; $aa >= 0; $aa--){ $temp = substr($out[$aa],1) . " "; $test = substr($out[$aa],0,1); if($test=="+")$temp = $a_plus . $temp . "
" . $a_plus_font; if($test=="-")$temp = $a_minus . $temp . "" . $a_minus_font; $paras1[$ii] = $paras1[$ii] . $temp; } } $ptok2[$jj] = "1p" . $ii; $targ = $jj; $continue = false; $nfound = false; } } } } } } if($nfound) $ptok1[$ii] = "1x" . $ii; //if($nfound) echo "x"; } else $ptok1[$ii] = "1xz"; } //for ($ii = 0; $ii<= $ptok1["count"]; $ii++) echo $ptok1[$ii] . " "; //echo "
"; //for ($ii = 0; $ii<= $ptok2["count"]; $ii++) echo $ptok2[$ii] . " "; //echo "
"; $lcs = lcs($ptok1, $ptok2, true); $out = $lcs["out"]; $kmax = $out["count"]; for ($aa = $kmax - 1; $aa >= 0; $aa--){ $type = substr($out[$aa],0,1); $nfound = substr($out[$aa],2,1); $src = substr($out[$aa],1,1); $ii = substr($out[$aa],3); if($src == "1") { if($type == "0") echo $paras1[$ii] . "

"; //moved from another location if($type == "+") echo "" . $a_table_plus . " 
" . $paras1[$ii] . "
"; //moved to another location if($type == "-" and $nfound == "p") echo "" . $a_table_minus_p . " 
" . $paras1[$ii] . "
"; //removed if($type == "-" and $nfound == "x" and $ii != "z") echo "" . $a_table_minus_x . " 
" . $paras1[$ii] . "
"; } else echo "" . $a_table_else . " 
" . $paras2[$ii] . "

"; } echo "


took: " . ((time() - $start)/60) . " minutes"; %>