implement similarity algorithm for questtype ?Submit?

This commit is contained in:
oliver 2016-04-09 13:21:23 +02:00
commit 53fda5caaf
6 changed files with 533 additions and 5 deletions

View file

@ -24,7 +24,7 @@
*
* @var array
*/
public $models = array('quests', 'uploads', 'users');
public $models = array('quests', 'uploads', 'users', 'characters', 'questgroups');
@ -210,6 +210,27 @@
catch(\nre\exceptions\IdNotFoundException $e) {
}
}
$submission['similar'] = $this->Submit->getSimilarSubmissions(
$seminary['id'],
$quest['id'],
$character['id'],
$submission['id']
);
foreach($submission['similar'] as &$similarSubmission)
{
$similarSubmission['quest'] = $this->Quests->getQuestById(
$similarSubmission['quest_id']
);
$similarSubmission['questgroup'] = $this->Questgroups->getQuestgroupById(
$similarSubmission['quest']['questgroup_id']
);
$similarSubmission['character'] = $this->Characters->getCharacterById(
$similarSubmission['character_id']
);
$similarSubmission['upload'] = $this->Uploads->getSeminaryuploadById(
$similarSubmission['upload_id']
);
}
}
// Status

View file

@ -81,6 +81,12 @@
$questId, $characterId, $uploadId
);
// Index submission for similarity calculation
$this->addDocument(
$this->db->getInsertId(),
ROOT.DS.\nre\configs\AppConfig::$dirs['seminaryuploads'].DS.$filename
);
return true;
}
@ -163,6 +169,245 @@
);
}
/**
* TODO getSimilarSubmissions()
*/
public function getSimilarSubmissions($seminaryId, $questId, $characterId, $submissionId)
{
// List of submissions with high similarity
$similarSubmissions = array();
// Get IDFs
$idf_N = $this->getIDF_total($seminaryId);
$idf_n = $this->getIDF_docs($seminaryId);
// Get stored TFs of submission
$tfsA = $this->getTFs($submissionId);
// Iterate through submissions of same task
$submissions = $this->getSubmissionsForQuest(
$questId,
$characterId,
$submissionId
);
foreach($submissions as &$submission)
{
if(is_null($submission['similarity']))
{
// Get stored TFs of submissions to compare to
$tfsB = $this->getTFs($submission['id']);
// Calculate similarity
$submission['similarity'] = \hhu\z\lib\Similarity::compare(
$tfsA,
$tfsB,
$idf_N,
$idf_n
);
// Save similarity
$this->setSimilarity(
$submissionId,
$submission['id'],
$submission['similarity']
);
}
// Add high simnilarities to list
if($submission['similarity'] >= 0.7) {
$similarSubmissions[] = $submission;
}
}
return $similarSubmissions;
}
/**
* TODO addDocument()
*/
private function addDocument($submissionId, $filename)
{
// Read document
$document = \hhu\z\lib\Similarity::readDocument($filename);
if($document === false) {
return false;
}
// Split document into terms
$terms = \hhu\z\lib\Similarity::splitNgrams($document);
// Update global values
$this->addTerms($submissionId, $terms);
}
/**
* TODO addTerms()
*/
private function addTerms($submissionId, $terms)
{
// Calculate IDF: n (n_term)
$uniqueTerms = array();
foreach($terms as &$term)
{
if(!in_array($term, $uniqueTerms))
{
// Add term to database
$this->db->query(
'INSERT IGNORE INTO questtypes_submit_terms '.
'(term) '.
'VALUES '.
'(?)',
's',
$term
);
$uniqueTerms[] = $term;
}
// Link term to submission
$this->db->query(
'INSERT INTO questtypes_submit_submissions_terms '.
'(submission_id, term_id, tf) '.
'SELECT ?, questtypes_submit_terms.id, 1 '.
'FROM questtypes_submit_terms '.
'WHERE term = ? '.
'ON DUPLICATE KEY UPDATE '.
'tf = tf + 1',
'is',
$submissionId,
$term
);
}
}
private function getSubmissionsForQuest($questId, $characterId, $submissionId)
{
return $this->db->query(
'SELECT questtypes_submit_characters.id, questtypes_submit_characters.created, questtypes_submit_characters.quest_id, character_id, upload_id, questtypes_submit_similarities.similarity '.
'FROM questtypes_submit_characters '.
'LEFT JOIN questtypes_submit_similarities ON questtypes_submit_similarities.submission_id1 = ? AND questtypes_submit_similarities.submission_id2 = questtypes_submit_characters.id '.
'WHERE quest_id = ? AND character_id != ?',
'iii',
$submissionId,
$questId, $characterId
);
}
/**
* TODO getTFs()
*/
private function getTFs($submissionId)
{
// Read terms
$terms = $this->db->query(
'SELECT term, tf '.
'FROM questtypes_submit_submissions_terms '.
'INNER JOIN questtypes_submit_terms ON questtypes_submit_terms.id = questtypes_submit_submissions_terms.term_id '.
'WHERE submission_id = ?',
'i',
$submissionId
);
// Convert to TFs
$tfs = array();
foreach($terms as &$term) {
$tfs[$term['term']] = $term['tf'];
}
// Return TFs
return $tfs;
}
/**
* TODO getIDF_N()
* Total count of submissions (per Seminary)
*/
private function getIDF_total($seminaryId)
{
$data = $this->db->query(
'SELECT count(questtypes_submit_characters.id) as c '.
'FROM charactertypes '.
'INNER JOIN characters ON characters.charactertype_id = charactertypes.id '.
'INNER JOIN questtypes_submit_characters ON questtypes_submit_characters.character_id = characters.id '.
'WHERE charactertypes.seminary_id = ?',
'i',
$seminaryId
);
if(!empty($data)) {
return $data[0]['c'];
}
return 0;
}
/**
* TODO getIDF_n()
* Count of submissions each term is in (per Seminary)
*/
private function getIDF_docs($seminaryId)
{
$terms = $this->db->query(
'SELECT questtypes_submit_terms.term, count(*) AS c '.
'FROM charactertypes '.
'INNER JOIN characters ON characters.charactertype_id = charactertypes.id '.
'INNER JOIN questtypes_submit_characters ON questtypes_submit_characters.character_id = characters.id '.
'INNER JOIN questtypes_submit_submissions_terms ON questtypes_submit_submissions_terms.submission_id = questtypes_submit_characters.id '.
'INNER JOIN questtypes_submit_terms ON questtypes_submit_terms.id = questtypes_submit_submissions_terms.term_id '.
'WHERE charactertypes.seminary_id = ? '.
'GROUP BY questtypes_submit_terms.term',
'i',
$seminaryId
);
$idfs = array();
foreach($terms as &$term) {
$idfs[$term['term']] = $term['c'];
}
return $idfs;
}
/**
* TODO setSimilarity()
*/
private function setSimilarity($submissionId1, $submissionId2, $similarity)
{
$this->db->query(
'INSERT INTO questtypes_submit_similarities '.
'(submission_id1, submission_id2, similarity) '.
'VALUES '.
'(?, ?, ?) '.
'ON DUPLICATE KEY UPDATE '.
'similarity = ?',
'iidd',
$submissionId1, $submissionId2, $similarity,
$similarity
);
$this->db->query(
'INSERT INTO questtypes_submit_similarities '.
'(submission_id1, submission_id2, similarity) '.
'VALUES '.
'(?, ?, ?) '.
'ON DUPLICATE KEY UPDATE '.
'similarity = ?',
'iidd',
$submissionId2, $submissionId1, $similarity,
$similarity
);
}
}
?>

View file

@ -16,6 +16,24 @@
<?php endforeach ?>
</ol>
<?php endif ?>
<?php if(!empty($submission['similar'])) : ?>
<h4><?=_('Similar submissions')?></h4>
<?php foreach($submission['similar'] as &$similar) : ?>
<ul>
<li>
<p><small><?=('Similarity')?>: <?=$numberFormatter->format($similar['similarity'])?></small></p>
<p><a href="<?=$linker->link(array('uploads','seminary',$seminary['url'], $similar['upload']['url']))?>"><?=$similar['upload']['name']?></a></p>
<p><small>
<a href="<?=$linker->link(array('quests','submission',$seminary['url'],$similar['questgroup']['url'],$similar['quest']['url'],$similar['character']['url']))?>">
<?=$similar['character']['name']?>,
<?=$similar['quest']['title']?>
</a>,
<?=$dateFormatter->format(new \DateTime($similar['created']))?> <?=$timeFormatter->format(new \DateTime($similar['created']))?>
</small></p>
</li>
</ul>
<?php endforeach ?>
<?php endif ?>
</li>
<?php endforeach ?>
</ol>
@ -24,10 +42,17 @@
<form method="post" class="logreg">
<?php $submission = array_pop($submissions); ?>
<?php if(!$solved) : ?>
<?=_('Comment')?><br />
<textarea name="characterdata[comment]"></textarea><br />
<input type="hidden" name="characterdata[submission_id]" value="<?=$submission['id']?>" />
<fieldset>
<legend><?=_('Comment')?></legend>
<textarea id="characterdata-comment" name="characterdata[comment]"></textarea><br />
<input type="hidden" name="characterdata[submission_id]" value="<?=$submission['id']?>" />
</fieldset>
<input type="submit" name="submit" value="<?=_('solved')?>" />
<input type="submit" name="submit" value="<?=_('unsolved')?>" />
<?php endif ?>
</form>
<script>
$(function() {
$("#characterdata-comment").markItUp(mySettings);
});
</script>