merge branch ?submit-similarity?
This commit is contained in:
commit
1925547cf4
7 changed files with 632 additions and 28 deletions
228
app/lib/Similarity.inc
Normal file
228
app/lib/Similarity.inc
Normal file
|
@ -0,0 +1,228 @@
|
|||
<?php
|
||||
|
||||
/**
|
||||
* The Legend of Z
|
||||
*
|
||||
* @author Oliver Hanraths <oliver.hanraths@uni-duesseldorf.de>
|
||||
* @copyright 2014 Heinrich-Heine-Universität Düsseldorf
|
||||
* @license http://www.gnu.org/licenses/gpl.html
|
||||
* @link https://bitbucket.org/coderkun/the-legend-of-z
|
||||
*/
|
||||
|
||||
namespace hhu\z\lib;
|
||||
|
||||
|
||||
/**
|
||||
* Class to calculate similarity between documents.
|
||||
*
|
||||
* @author Oliver Hanraths <oliver.hanraths@uni-duesseldorf.de>
|
||||
*/
|
||||
class Similarity
|
||||
{
|
||||
|
||||
|
||||
/**
|
||||
* Read a file and return its text.
|
||||
*
|
||||
* Currently only PDF-files are supported and “pdftotext” needs to be
|
||||
* installed. If reading fails, false is returned.
|
||||
*
|
||||
* @param string $filename Name of file to read
|
||||
* @return mixed Text of document (string) or false (boolean)
|
||||
*/
|
||||
public static function readDocument($filename)
|
||||
{
|
||||
if(!file_exists($filename)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$text = array();
|
||||
$result = 0;
|
||||
exec(sprintf('pdftotext "%s" -', $filename), $text, $result);
|
||||
if($result != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$text = mb_strtolower(implode('', $text));
|
||||
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Split a text into N-grams.
|
||||
*
|
||||
* The default N is 3.
|
||||
*
|
||||
* @param string $document Text to be splitted
|
||||
* @param int $n Size of grams to split into (N)
|
||||
* @return array List of n-grams
|
||||
*/
|
||||
public static function splitNgrams($document, $n=3)
|
||||
{
|
||||
$affix = implode(' ', array_fill(0, $n-1, ' '));
|
||||
$document = $affix.$document.$affix;
|
||||
$ngrams = array();
|
||||
for($i=0; $i<mb_strlen($document)-$n; $i++) {
|
||||
$ngrams[] = mb_substr($document, $i, $n);
|
||||
}
|
||||
|
||||
|
||||
return $ngrams;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compare to documents, represented by there Term Frequencies (TFs)
|
||||
* values.
|
||||
*
|
||||
* $tfsA, $tfsB and $idf_n are expected to be associative arrays with
|
||||
* the term as key and the corresponding frequency as value.
|
||||
*
|
||||
* @param array $tfsA Term Frequencies of document A
|
||||
* @param array $tfsB Term Frequencies of document B
|
||||
* @param int $idf_N Total count of documents in corpus
|
||||
* @param array $idf_n Inverse Document Frequencies of all terms
|
||||
* @return float Similarity value (between 0.0 and 1.0)
|
||||
*/
|
||||
public static function compare($tfsA, $tfsB, $idf_N, $idf_n)
|
||||
{
|
||||
// Create vector A
|
||||
$vectorA = self::getVector($tfsA, $idf_N, $idf_n);
|
||||
|
||||
// Create vector B
|
||||
$vectorB = self::getVector($tfsB, $idf_N, $idf_n);
|
||||
|
||||
// Compare vectors
|
||||
$result = self::cosinus($vectorA, $vectorB);
|
||||
|
||||
|
||||
// Return result
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Calculate the vector for a document based on TF and IDF.
|
||||
*
|
||||
* $tfs and $idf_n are expected to be associative arrays with the term
|
||||
* as key and the corresponding frequency as value. The resulting
|
||||
* vector is an associative array with the terms as keys and their
|
||||
* corresponding values as value.
|
||||
*
|
||||
* @param array $tfs Term Frequencies of document
|
||||
* @param int $idf_N Total count of documents in corpus
|
||||
* @param array $idf_n Inverse Document Frequencies of all terms
|
||||
* @return array Document vector
|
||||
*/
|
||||
protected static function getVector($tfs, $idf_N, $idf_n)
|
||||
{
|
||||
// TF * IDF
|
||||
$tfidfs = self::getTFIDFs($tfs, $idf_N, $idf_n);
|
||||
|
||||
|
||||
return $tfidfs;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Calculate TF*IDF values for a document.
|
||||
*
|
||||
* $tfs and $idf_n are expected to be associative arrays with the term
|
||||
* as key and the corresponding frequency as value. The resulting
|
||||
* value is an associative array with the terms as keys and their
|
||||
* corresponding TF*IDF as values.
|
||||
*
|
||||
* @param array $tfs Term Frequencies of document
|
||||
* @param int $idf_N Total count of documents in corpus
|
||||
* @param array $idf_n Inverse Document Frequencies of all terms
|
||||
* @return array TF*IDF values
|
||||
*/
|
||||
protected static function getTFIDFs($tfs, $idf_N, $idf_n)
|
||||
{
|
||||
$tfidfs = array();
|
||||
|
||||
// Calculate TF*IDF
|
||||
foreach($tfs as $term => &$tf)
|
||||
{
|
||||
if(array_key_exists($term, $idf_n)) {
|
||||
$idf = log($idf_N / $idf_n[$term], 2);
|
||||
}
|
||||
else {
|
||||
// TODO Laplace norm: n = 1?
|
||||
$idf = log($idf_N / 1, 2);
|
||||
}
|
||||
$tfidfs[$term] = $tf * $idf;
|
||||
}
|
||||
|
||||
|
||||
return $tfidfs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate cosinus similarity between two vectors.
|
||||
*
|
||||
* sim(a, b) = (a・b) / (||a|| * ||b||)
|
||||
*
|
||||
* @param array $a Vector A
|
||||
* @param array $b Vector B
|
||||
* @return float Similarity value (between 0.0 and 1.0)
|
||||
*/
|
||||
protected static function cosinus(array $a, array $b)
|
||||
{
|
||||
$normA = self::norm($a);
|
||||
$normB = self::norm($b);
|
||||
if(($normA * $normB) != 0) {
|
||||
return self::dotProduct($a, $b) / ($normA * $normB);
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate the dot-product for two vectors.
|
||||
*
|
||||
* a・b = summation{i=1,n}(a[i] * b[i])
|
||||
*
|
||||
* @param array $a Vector A
|
||||
* @param array $b Vector B
|
||||
* @return float Dot-product
|
||||
*/
|
||||
protected static function dotProduct(array $a, array $b)
|
||||
{
|
||||
$dotProduct = 0;
|
||||
$keysA = array_keys(array_filter($a));
|
||||
$keysB = array_keys(array_filter($b));
|
||||
$uniqueKeys = array_unique(array_merge($keysA, $keysB));
|
||||
foreach($uniqueKeys as $key) {
|
||||
if(!empty($a[$key]) && !empty($b[$key])) {
|
||||
$dotProduct += ($a[$key] * $b[$key]);
|
||||
}
|
||||
}
|
||||
|
||||
return $dotProduct;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Caculate the Euclidean norm for a vector.
|
||||
*
|
||||
* ||x|| = sqrt(x・x) // ・ is a dot product
|
||||
*
|
||||
* @param array $vector Vector
|
||||
* @return float Euclidean norm
|
||||
*/
|
||||
protected static function norm(array $vector)
|
||||
{
|
||||
return sqrt(self::dotProduct($vector, $vector));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
?>
|
|
@ -153,6 +153,10 @@
|
|||
array(
|
||||
'mimetype' => 'image/png',
|
||||
'size' => 1048576
|
||||
),
|
||||
array(
|
||||
'mimetype' => 'application/pdf',
|
||||
'size' => 1048576
|
||||
)
|
||||
),
|
||||
'map' => array(
|
||||
|
|
|
@ -1866,28 +1866,58 @@ CREATE TABLE `questtypes_submit_characters_comments` (
|
|||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `questtypes_submit_mimetypes`
|
||||
-- Table structure for table `questtypes_submit_similarities`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `questtypes_submit_mimetypes`;
|
||||
DROP TABLE IF EXISTS `questtypes_submit_similarities`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `questtypes_submit_mimetypes` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
CREATE TABLE `questtypes_submit_similarities` (
|
||||
`submission_id1` int(11) NOT NULL,
|
||||
`submission_id2` int(11) NOT NULL,
|
||||
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`created_user_id` int(11) NOT NULL,
|
||||
`seminary_id` int(11) NOT NULL,
|
||||
`mimetype` varchar(32) COLLATE utf8mb4_unicode_ci NOT NULL,
|
||||
`size` int(10) unsigned NOT NULL DEFAULT '0',
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `mimetype` (`mimetype`,`seminary_id`),
|
||||
KEY `created_user_id` (`created_user_id`),
|
||||
KEY `seminary_id` (`seminary_id`),
|
||||
CONSTRAINT `questtypes_submit_mimetypes_ibfk_1` FOREIGN KEY (`created_user_id`) REFERENCES `users` (`id`),
|
||||
CONSTRAINT `questtypes_submit_mimetypes_ibfk_2` FOREIGN KEY (`seminary_id`) REFERENCES `seminaries` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
|
||||
`similarity` decimal(10,9) NOT NULL,
|
||||
PRIMARY KEY (`submission_id1`,`submission_id2`),
|
||||
KEY `submission_id2` (`submission_id2`),
|
||||
CONSTRAINT `questtypes_submit_similarities_ibfk_1` FOREIGN KEY (`submission_id1`) REFERENCES `questtypes_submit_characters` (`id`) ON DELETE CASCADE ON UPDATE CASCADE,
|
||||
CONSTRAINT `questtypes_submit_similarities_ibfk_2` FOREIGN KEY (`submission_id2`) REFERENCES `questtypes_submit_characters` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `questtypes_submit_submissions_terms`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `questtypes_submit_submissions_terms`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `questtypes_submit_submissions_terms` (
|
||||
`submission_id` int(11) NOT NULL,
|
||||
`term_id` int(11) NOT NULL,
|
||||
`created` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
`tf` mediumint(8) unsigned NOT NULL DEFAULT '1',
|
||||
PRIMARY KEY (`submission_id`,`term_id`),
|
||||
KEY `term_id` (`term_id`),
|
||||
CONSTRAINT `questtypes_submit_submissions_terms_ibfk_1` FOREIGN KEY (`submission_id`) REFERENCES `questtypes_submit_characters` (`id`) ON DELETE CASCADE ON UPDATE CASCADE,
|
||||
CONSTRAINT `questtypes_submit_submissions_terms_ibfk_2` FOREIGN KEY (`term_id`) REFERENCES `questtypes_submit_terms` (`id`) ON DELETE CASCADE ON UPDATE CASCADE
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `questtypes_submit_terms`
|
||||
--
|
||||
|
||||
DROP TABLE IF EXISTS `questtypes_submit_terms`;
|
||||
/*!40101 SET @saved_cs_client = @@character_set_client */;
|
||||
/*!40101 SET character_set_client = utf8 */;
|
||||
CREATE TABLE `questtypes_submit_terms` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`term` varchar(9) COLLATE utf8mb4_unicode_ci NOT NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `term` (`term`)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='Terms/N-grams';
|
||||
/*!40101 SET character_set_client = @saved_cs_client */;
|
||||
|
||||
--
|
||||
-- Table structure for table `questtypes_textinput`
|
||||
--
|
||||
|
@ -2663,4 +2693,4 @@ DELIMITER ;
|
|||
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
|
||||
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
|
||||
|
||||
-- Dump completed on 2016-03-26 19:13:31
|
||||
-- Dump completed on 2016-04-09 13:18:45
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
*
|
||||
* @var array
|
||||
*/
|
||||
public $models = array('quests', 'uploads', 'users');
|
||||
public $models = array('quests', 'uploads', 'users', 'characters', 'questgroups');
|
||||
|
||||
|
||||
|
||||
|
@ -210,6 +210,27 @@
|
|||
catch(\nre\exceptions\IdNotFoundException $e) {
|
||||
}
|
||||
}
|
||||
$submission['similar'] = $this->Submit->getSimilarSubmissions(
|
||||
$seminary['id'],
|
||||
$quest['id'],
|
||||
$character['id'],
|
||||
$submission['id']
|
||||
);
|
||||
foreach($submission['similar'] as &$similarSubmission)
|
||||
{
|
||||
$similarSubmission['quest'] = $this->Quests->getQuestById(
|
||||
$similarSubmission['quest_id']
|
||||
);
|
||||
$similarSubmission['questgroup'] = $this->Questgroups->getQuestgroupById(
|
||||
$similarSubmission['quest']['questgroup_id']
|
||||
);
|
||||
$similarSubmission['character'] = $this->Characters->getCharacterById(
|
||||
$similarSubmission['character_id']
|
||||
);
|
||||
$similarSubmission['upload'] = $this->Uploads->getSeminaryuploadById(
|
||||
$similarSubmission['upload_id']
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Status
|
||||
|
|
|
@ -19,6 +19,19 @@
|
|||
*/
|
||||
class SubmitQuesttypeModel extends \hhu\z\models\QuesttypeModel
|
||||
{
|
||||
/**
|
||||
* Minimum similarity value for two submissions
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
const SIMILARITY_MIN = 0.8;
|
||||
/**
|
||||
* Supported mimetypes
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
const mimetypes = array('application/pdf');
|
||||
|
||||
/**
|
||||
* Required models
|
||||
*
|
||||
|
@ -81,6 +94,12 @@
|
|||
$questId, $characterId, $uploadId
|
||||
);
|
||||
|
||||
// Index submission for similarity calculation
|
||||
$this->addDocument(
|
||||
$this->db->getInsertId(),
|
||||
ROOT.DS.\nre\configs\AppConfig::$dirs['seminaryuploads'].DS.$filename
|
||||
);
|
||||
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -131,18 +150,20 @@
|
|||
/**
|
||||
* Get allowed mimetypes for uploading a file.
|
||||
*
|
||||
* @param int $seminaryId ID of Seminary
|
||||
* @return array Allowed mimetypes
|
||||
* @param int $seminaryId ID of Seminary
|
||||
* @return array Allowed mimetypes
|
||||
*/
|
||||
public function getAllowedMimetypes($seminaryId)
|
||||
{
|
||||
return $this->db->query(
|
||||
'SELECT id, mimetype, size '.
|
||||
'FROM questtypes_submit_mimetypes '.
|
||||
'WHERE seminary_id = ?',
|
||||
'i',
|
||||
$seminaryId
|
||||
);
|
||||
$mimetypes = array();
|
||||
foreach(\nre\configs\AppConfig::$mimetypes['questtypes'] as $mimetype) {
|
||||
if(in_array($mimetype['mimetype'], self::mimetypes)) {
|
||||
$mimetypes[] = $mimetype;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return $mimetypes;
|
||||
}
|
||||
|
||||
|
||||
|
@ -163,6 +184,280 @@
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get similar submissions for a Character submission.
|
||||
*
|
||||
* @param int $seminaryId ID of Seminary
|
||||
* @param int $questId ID of Quest
|
||||
* @param int $characterId ID of Character
|
||||
* @param int $submissionId ID of submission
|
||||
* @return array List of submissions
|
||||
*/
|
||||
public function getSimilarSubmissions($seminaryId, $questId, $characterId, $submissionId)
|
||||
{
|
||||
// List of submissions with high similarity
|
||||
$similarSubmissions = array();
|
||||
|
||||
// Get IDFs
|
||||
$idf_N = $this->getIDF_total($seminaryId);
|
||||
$idf_n = $this->getIDF_docs($seminaryId);
|
||||
|
||||
// Get stored TFs of submission
|
||||
$tfsA = $this->getTFs($submissionId);
|
||||
|
||||
// Get submissions of same task
|
||||
$submissions = $this->getSubmissionsForQuest(
|
||||
$questId,
|
||||
$characterId,
|
||||
$submissionId
|
||||
);
|
||||
|
||||
// Iterate through submissions of same task
|
||||
foreach($submissions as &$submission)
|
||||
{
|
||||
// Check if similarity has already be calculated
|
||||
if(is_null($submission['similarity']))
|
||||
{
|
||||
// Get stored TFs of submissions to compare to
|
||||
$tfsB = $this->getTFs($submission['id']);
|
||||
|
||||
// Calculate similarity
|
||||
$submission['similarity'] = \hhu\z\lib\Similarity::compare(
|
||||
$tfsA,
|
||||
$tfsB,
|
||||
$idf_N,
|
||||
$idf_n
|
||||
);
|
||||
|
||||
// Save similarity
|
||||
$this->setSimilarity(
|
||||
$submissionId,
|
||||
$submission['id'],
|
||||
$submission['similarity']
|
||||
);
|
||||
}
|
||||
|
||||
// Add high simnilarities to list
|
||||
if($submission['similarity'] >= self::SIMILARITY_MIN) {
|
||||
$similarSubmissions[] = $submission;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return $similarSubmissions;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Index a submission as document.
|
||||
*
|
||||
* @param int $submissionId ID of submission
|
||||
* @param string $filename Full file path of document to read
|
||||
*/
|
||||
private function addDocument($submissionId, $filename)
|
||||
{
|
||||
// Read document
|
||||
$document = \hhu\z\lib\Similarity::readDocument($filename);
|
||||
if($document === false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Split document into terms
|
||||
$terms = \hhu\z\lib\Similarity::splitNgrams($document);
|
||||
|
||||
// Update global values
|
||||
$this->addTerms($submissionId, $terms);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add terms to the corpus, stored in database.
|
||||
*
|
||||
* @param int $submissionId ID of submission
|
||||
* @param array $terms List of (non-unique) terms
|
||||
*/
|
||||
private function addTerms($submissionId, $terms)
|
||||
{
|
||||
// Calculate IDF: n (n_term)
|
||||
$uniqueTerms = array();
|
||||
foreach($terms as &$term)
|
||||
{
|
||||
if(!in_array($term, $uniqueTerms))
|
||||
{
|
||||
// Add term to database
|
||||
$this->db->query(
|
||||
'INSERT IGNORE INTO questtypes_submit_terms '.
|
||||
'(term) '.
|
||||
'VALUES '.
|
||||
'(?)',
|
||||
's',
|
||||
$term
|
||||
);
|
||||
$uniqueTerms[] = $term;
|
||||
}
|
||||
|
||||
// Link term to submission
|
||||
$this->db->query(
|
||||
'INSERT INTO questtypes_submit_submissions_terms '.
|
||||
'(submission_id, term_id, tf) '.
|
||||
'SELECT ?, questtypes_submit_terms.id, 1 '.
|
||||
'FROM questtypes_submit_terms '.
|
||||
'WHERE term = ? '.
|
||||
'ON DUPLICATE KEY UPDATE '.
|
||||
'tf = tf + 1',
|
||||
'is',
|
||||
$submissionId,
|
||||
$term
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get all submissions for a Quest including similarity values to the
|
||||
* given submission, excluding the submissions of the given Character.
|
||||
*
|
||||
* @param int $questId ID of Quest
|
||||
* @param int $characterId ID of Character to exclude submissions of
|
||||
* @param int $submissionId ID of submission to get similarity values for
|
||||
* @return array List of submissions
|
||||
*/
|
||||
private function getSubmissionsForQuest($questId, $characterId, $submissionId)
|
||||
{
|
||||
return $this->db->query(
|
||||
'SELECT questtypes_submit_characters.id, questtypes_submit_characters.created, questtypes_submit_characters.quest_id, character_id, upload_id, questtypes_submit_similarities.similarity '.
|
||||
'FROM questtypes_submit_characters '.
|
||||
'LEFT JOIN questtypes_submit_similarities ON questtypes_submit_similarities.submission_id1 = ? AND questtypes_submit_similarities.submission_id2 = questtypes_submit_characters.id '.
|
||||
'WHERE quest_id = ? AND character_id != ?',
|
||||
'iii',
|
||||
$submissionId,
|
||||
$questId, $characterId
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get Term Frequency (TF) values for a submission.
|
||||
*
|
||||
* @param int $submissionId ID of submission
|
||||
* @return array Associative array with term as key and frequency as value
|
||||
*/
|
||||
private function getTFs($submissionId)
|
||||
{
|
||||
// Read terms
|
||||
$terms = $this->db->query(
|
||||
'SELECT term, tf '.
|
||||
'FROM questtypes_submit_submissions_terms '.
|
||||
'INNER JOIN questtypes_submit_terms ON questtypes_submit_terms.id = questtypes_submit_submissions_terms.term_id '.
|
||||
'WHERE submission_id = ?',
|
||||
'i',
|
||||
$submissionId
|
||||
);
|
||||
|
||||
// Convert to TFs
|
||||
$tfs = array();
|
||||
foreach($terms as &$term) {
|
||||
$tfs[$term['term']] = $term['tf'];
|
||||
}
|
||||
|
||||
|
||||
// Return TFs
|
||||
return $tfs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get total count of submissions for a Seminary.
|
||||
*
|
||||
* @param int $seminaryId ID of Seminary
|
||||
* @return int Total count of submissions
|
||||
*/
|
||||
private function getIDF_total($seminaryId)
|
||||
{
|
||||
$data = $this->db->query(
|
||||
'SELECT count(questtypes_submit_characters.id) as c '.
|
||||
'FROM charactertypes '.
|
||||
'INNER JOIN characters ON characters.charactertype_id = charactertypes.id '.
|
||||
'INNER JOIN questtypes_submit_characters ON questtypes_submit_characters.character_id = characters.id '.
|
||||
'WHERE charactertypes.seminary_id = ?',
|
||||
'i',
|
||||
$seminaryId
|
||||
);
|
||||
if(!empty($data)) {
|
||||
return $data[0]['c'];
|
||||
}
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get count of submissions each term is in for a Seminary.
|
||||
*
|
||||
* @param int $seminaryId ID of Seminary
|
||||
* @return array Associatve array wtih terms as keys and counts as values
|
||||
*/
|
||||
private function getIDF_docs($seminaryId)
|
||||
{
|
||||
$terms = $this->db->query(
|
||||
'SELECT questtypes_submit_terms.term, count(*) AS c '.
|
||||
'FROM charactertypes '.
|
||||
'INNER JOIN characters ON characters.charactertype_id = charactertypes.id '.
|
||||
'INNER JOIN questtypes_submit_characters ON questtypes_submit_characters.character_id = characters.id '.
|
||||
'INNER JOIN questtypes_submit_submissions_terms ON questtypes_submit_submissions_terms.submission_id = questtypes_submit_characters.id '.
|
||||
'INNER JOIN questtypes_submit_terms ON questtypes_submit_terms.id = questtypes_submit_submissions_terms.term_id '.
|
||||
'WHERE charactertypes.seminary_id = ? '.
|
||||
'GROUP BY questtypes_submit_terms.term',
|
||||
'i',
|
||||
$seminaryId
|
||||
);
|
||||
|
||||
$idfs = array();
|
||||
foreach($terms as &$term) {
|
||||
$idfs[$term['term']] = $term['c'];
|
||||
}
|
||||
|
||||
|
||||
return $idfs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Save the similarity of two submissions.
|
||||
*
|
||||
* @param int $submissionId1 ID of submission
|
||||
* @param int $submissionId2 ID of submission
|
||||
* @param float $similarity Similarity of both submissions
|
||||
*/
|
||||
private function setSimilarity($submissionId1, $submissionId2, $similarity)
|
||||
{
|
||||
$this->db->query(
|
||||
'INSERT INTO questtypes_submit_similarities '.
|
||||
'(submission_id1, submission_id2, similarity) '.
|
||||
'VALUES '.
|
||||
'(?, ?, ?) '.
|
||||
'ON DUPLICATE KEY UPDATE '.
|
||||
'similarity = ?',
|
||||
'iidd',
|
||||
$submissionId1, $submissionId2, $similarity,
|
||||
$similarity
|
||||
);
|
||||
$this->db->query(
|
||||
'INSERT INTO questtypes_submit_similarities '.
|
||||
'(submission_id1, submission_id2, similarity) '.
|
||||
'VALUES '.
|
||||
'(?, ?, ?) '.
|
||||
'ON DUPLICATE KEY UPDATE '.
|
||||
'similarity = ?',
|
||||
'iidd',
|
||||
$submissionId2, $submissionId1, $similarity,
|
||||
$similarity
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
?>
|
||||
|
|
|
@ -16,6 +16,24 @@
|
|||
<?php endforeach ?>
|
||||
</ol>
|
||||
<?php endif ?>
|
||||
<?php if(!empty($submission['similar'])) : ?>
|
||||
<h4><?=_('Similar submissions')?></h4>
|
||||
<?php foreach($submission['similar'] as &$similar) : ?>
|
||||
<ul>
|
||||
<li>
|
||||
<p><small><?=('Similarity')?>: <?=$numberFormatter->format($similar['similarity'])?></small></p>
|
||||
<p><a href="<?=$linker->link(array('uploads','seminary',$seminary['url'], $similar['upload']['url']))?>"><?=$similar['upload']['name']?></a></p>
|
||||
<p><small>
|
||||
<a href="<?=$linker->link(array('quests','submission',$seminary['url'],$similar['questgroup']['url'],$similar['quest']['url'],$similar['character']['url']))?>">
|
||||
<?=$similar['character']['name']?>,
|
||||
<?=$similar['quest']['title']?>
|
||||
</a>,
|
||||
<?=$dateFormatter->format(new \DateTime($similar['created']))?> <?=$timeFormatter->format(new \DateTime($similar['created']))?>
|
||||
</small></p>
|
||||
</li>
|
||||
</ul>
|
||||
<?php endforeach ?>
|
||||
<?php endif ?>
|
||||
</li>
|
||||
<?php endforeach ?>
|
||||
</ol>
|
||||
|
@ -24,10 +42,17 @@
|
|||
<form method="post" class="logreg">
|
||||
<?php $submission = array_pop($submissions); ?>
|
||||
<?php if(!$solved) : ?>
|
||||
<?=_('Comment')?><br />
|
||||
<textarea name="characterdata[comment]"></textarea><br />
|
||||
<input type="hidden" name="characterdata[submission_id]" value="<?=$submission['id']?>" />
|
||||
<fieldset>
|
||||
<legend><?=_('Comment')?></legend>
|
||||
<textarea id="characterdata-comment" name="characterdata[comment]"></textarea><br />
|
||||
<input type="hidden" name="characterdata[submission_id]" value="<?=$submission['id']?>" />
|
||||
</fieldset>
|
||||
<input type="submit" name="submit" value="<?=_('solved')?>" />
|
||||
<input type="submit" name="submit" value="<?=_('unsolved')?>" />
|
||||
<?php endif ?>
|
||||
</form>
|
||||
<script>
|
||||
$(function() {
|
||||
$("#characterdata-comment").markItUp(mySettings);
|
||||
});
|
||||
</script>
|
||||
|
|
|
@ -12,6 +12,7 @@ img{border:0}
|
|||
h1,h2,h3{color:#103a3e}
|
||||
h2{font-size:120%;margin-top:25px}
|
||||
h3{font-size:100%}
|
||||
h4{margin-bottom:0}
|
||||
ul,ol,nav{padding:0;margin-top:0;list-style-type:none}
|
||||
p{margin:0 0 16px;padding:0}
|
||||
audio,canvas,video{display:inline-block}
|
||||
|
|
Loading…
Add table
Reference in a new issue