可信度加權決策賦予新聞可信度的可行性評估

說明

這是部落格文章『可信度加權決策賦予新聞可信度的可行性評估』的實作程式碼,如想瞭解更詳細的說明,請參考該文

摘要

本文將探討可信度加權決策(Believability Weighted Decision Making)賦予新聞可信度的可行性評估,以及當其中有不公正參與者時,對於可信度加權決策後的結果與參與者本身的權重會有何種影響。

變數定義

變數名稱 中文 說明
reporter 報導者
news 新聞
reader 讀者
review 評分 對新聞打分數
reviewer 評分者 對新聞打分數的讀者
judge 評價 對評分打分數
judger 評價者 對評分打分數的讀者
score 分數 即為可信度,同weight
weight 權重 即為可信度,同score

測試資料

初始化

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

num = {
    
    # 報導者人數
    'reporter' : 8,
    
    # 每個報導者生產新聞數
    'news_per_reporter' : 40,
    
    # 讀者人數
    'reader' : 20,
    
    # 讀者舊權重保留筆數
    'reader_pass_weight_window' : 10 
}

讀者

In [2]:
def get_readers(n):
    readers = pd.DataFrame({
        'reader_id' : np.arange(n)
    })
    return readers

get_readers(num['reader']).head()
Out[2]:
reader_id
0 0
1 1
2 2
3 3
4 4

讀者現有權重

In [3]:
%run src/scale.py
%run src/reader_weight_holder.py

ReaderWeightHolder(get_readers(num['reader']), Scale().mean, num['reader_pass_weight_window']).print_weights()
Out[3]:
weight
user_id
0 5.5
1 5.5
2 5.5
3 5.5
4 5.5

讀者過去權重

In [4]:
%run src/scale.py
%run src/reader_weight_holder.py

ReaderWeightHolder(get_readers(num['reader']), Scale().mean, num['reader_pass_weight_window']).print_past_weights()
Out[4]:
0 1 2 3 4 5 6 7 8 9
user_id
0 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5
1 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5
2 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5
3 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5
4 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5 5.5

報導者

In [5]:
%run src/helper.py

def get_reporters(n, scale):
    
    # 常態分佈下的分數機率
    z_scores = np.random.randn(n)
    
    # 測試不公正的讀者
    z_scores[0] = -2.5
    
    # 換算成實際分數
    scores = [scale.translateZ(z_score) for z_score in z_scores]
    
    reporters = pd.DataFrame({
        'reporter_score': scores,
        'reporter_z_scores': z_scores
    })

    return index_as_id(reporters, 'reporter')

get_reporters(num['reader'], Scale()).head()
Out[5]:
reporter_id reporter_score reporter_z_scores
0 0 1.750000 -2.500000
1 1 4.341668 -0.772221
2 2 8.940270 2.293513
3 3 5.883301 0.255534
4 4 4.391794 -0.738804

新聞

In [6]:
%run src/scale.py
%run src/helper.py

def get_news(per_reporter, scale, reporters):
        
    # 報導者column
    reporter_ids = list(reporters['reporter_id']) * per_reporter
    
    # 報導者的可信度z分數
    reporter_z_scores = list(reporters['reporter_z_scores']) * per_reporter
    
    # 每則新聞的可信度z分數,定義為與報導者相關的常態分配~N(reporter.z_score, 1),所以後面要再加上報導者的可信度z分數
    news_z_scores = np.random.randn(len(reporter_ids)) + reporter_z_scores
    
    # 新聞的真實可信度分數
    news_scores = [scale.translateZ(news_z_score) for news_z_score in news_z_scores]
    
    news = pd.DataFrame({
        'reporter_id' : reporter_ids,
        'news_score' : news_scores, 
        'news_z_score': news_z_scores
    })
    
    return index_as_id(news, 'news')

def test_get_news():
    scale = Scale()
    reporters = get_reporters(num['reporter'], scale)
    
    return get_news(num['news_per_reporter'], scale, reporters)

test_get_news().head()
Out[6]:
news_id reporter_id news_score news_z_score
0 0 0 1.853595 -2.430937
1 1 1 5.298411 -0.134393
2 2 2 7.189510 1.126340
3 3 3 3.218423 -1.521051
4 4 4 2.650994 -1.899337

評分結果

In [7]:
%run src/scale.py
%run src/helper.py

def get_reviews(scale, reviewers, news):
    
    # 每個評分者要評分的每則新聞
    prepared = [[reviewer_id, n.news_id, n.reporter_id, n.news_score, n.news_z_score] for reviewer_id in reviewers['reader_id'] for n in news.itertuples(index = False)]
    reviews = pd.DataFrame(prepared, columns = ['reviewer_id', 'news_id', 'reporter_id', 'news_score', 'news_z_score'])
  
    # 評分與新聞的可信度相關
    reviews['review_z_score'] = np.random.randn(len(reviews)) + reviews['news_z_score']
    reviews['review_score'] = [scale.translateZ(review_z_score) for review_z_score in reviews['review_z_score']]

    return index_as_id(reviews, 'review')

def test_get_reviews():
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    news = get_news(num['news_per_reporter'], scale, reporters)
    
    return get_reviews(scale, readers, news)

test_get_reviews().head()
Out[7]:
review_id reviewer_id news_id reporter_id news_score news_z_score review_z_score review_score
0 0 0 0 0 1.000000 -3.898062 -3.225859 1.000000
1 1 0 1 1 4.820628 -0.452915 -0.839662 4.240507
2 2 0 2 2 5.404907 -0.063395 -0.426017 4.860975
3 3 0 3 3 6.460432 0.640288 0.714646 6.571969
4 4 0 4 4 5.243977 -0.170682 0.259754 5.889631

評價結果

In [8]:
%run src/scale.py
%run src/helper.py

def get_judges(scale, reviewers, reviews, news):
    
    # 每個評價者要評價的每則評分
    prepared = [[review.review_id, review.review_score, review.news_score, review.reporter_id, judger_id] for review in reviews.itertuples(index = False) for judger_id in reviewers['reader_id']]
    judges = pd.DataFrame(prepared, columns = ['review_id', 'review_score', 'news_score', 'reporter_id', 'judger_id'])

    # 評價公式,評分的分數與實際的分數越接近,評價分數越高
    judging = lambda review_score, real_score: scale.max - abs(review_score - real_score)
    
    # 計算每則評分的評價分數
    judges['judge_score'] = [judging(judge.review_score, judge.news_score) for judge in judges.itertuples(index = False)]
    
    return index_as_id(judges, 'judge')

def test_get_judges():
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    news = get_news(num['news_per_reporter'], scale, reporters)
    reviews = get_reviews(scale, readers, news)
    
    return get_judges(scale, readers, reviews, news)

test_get_judges().head()
Out[8]:
judge_id review_id review_score news_score reporter_id judger_id judge_score
0 0 0 5.259189 4.031518 0 0 8.772329
1 1 0 5.259189 4.031518 0 1 8.772329
2 2 0 5.259189 4.031518 0 2 8.772329
3 3 0 5.259189 4.031518 0 3 8.772329
4 4 0 5.259189 4.031518 0 4 8.772329

合併資料

In [9]:
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def get_data(news, reviews, judges, reader_weight_holder):
    '''合併在一起,方便後續計算'''
    
    cols_to_use = lambda right, left: left.columns.difference(right.columns)

    data = judges.merge(reviews[cols_to_use(judges, reviews)], left_on = 'review_id', right_index = True)
    data = data.merge(news[cols_to_use(data, news)], left_on = 'news_id', right_index = True)
    data['judger_weight'] = [reader_weight_holder.get(judger_id) for judger_id in data['judger_id']]

    # 排除自己評自己的
    data = data[data.judger_id != data.reviewer_id]

    return data

def test_get_data():
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    news = get_news(num['news_per_reporter'], scale, reporters)
    reviews = get_reviews(scale, readers, news)
    judges = get_judges(scale, readers, reviews, news)
    reader_weight_holder = ReaderWeightHolder(readers, scale.mean)
    
    return get_data(news, reviews, judges, reader_weight_holder)
    
test_get_data().head()
Out[9]:
judge_id review_id review_score news_score reporter_id judger_id judge_score news_id news_z_score review_z_score reviewer_id judger_weight
1 1 0 3.82536 3.311498 0 1 9.486137 0 -1.459002 -1.116426 0 5.5
2 2 0 3.82536 3.311498 0 2 9.486137 0 -1.459002 -1.116426 0 5.5
3 3 0 3.82536 3.311498 0 3 9.486137 0 -1.459002 -1.116426 0 5.5
4 4 0 3.82536 3.311498 0 4 9.486137 0 -1.459002 -1.116426 0 5.5
5 5 0 3.82536 3.311498 0 5 9.486137 0 -1.459002 -1.116426 0 5.5

計算可信度 / 權重流程

從評價分數計算讀者的權重

In [10]:
def get_reviewer_weights(data):

    judge_weight_averaging = lambda x: np.average(x['judge_score'], weights = x.judger_weight)
    reviewer_weights = data.groupby('reviewer_id').apply(judge_weight_averaging)
    
    return reviewer_weights

get_reviewer_weights(test_get_data()).head()
Out[10]:
reviewer_id
0    8.970573
1    8.954819
2    8.948805
3    8.953572
4    8.909166
dtype: float64

更新讀者權重

In [11]:
def update_reader_weight(data, reader_weight_holder):
    # 附在原資料上,方便後續計算
    data['reviewer_weight'] = [reader_weight_holder.get(reviewer_id) for reviewer_id in data['reviewer_id']]
    return data

從評分分數計算新聞和報導者的可信度

In [12]:
# 簡單平均
review_averaging = lambda x:np.average(x['review_score'])

# 加權平均
review_weight_averaging = lambda x: np.average(x['review_score'], weights = x.reviewer_weight)

def get_score(data, key, weighted = False):
    averaging_func = review_weight_averaging if weighted else review_averaging
    return data.groupby(key).apply(averaging_func)

# 計算新聞的可信度/權重
def get_news_score(data, weighted = False):
    return get_score(data, 'news_id', weighted)

# 計算報導者的可信度/權重
def get_reporter_score(data, weighted = False):
    return get_score(data, 'reporter_id', weighted)

公正的讀者

In [13]:
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def simulate(times, scale, readers, reporters):
    
    reader_weight_holder = ReaderWeightHolder(readers, scale.mean)
    
    for i in range(times):
        news = get_news(num['news_per_reporter'], scale, reporters)
        reviews = get_reviews(scale, readers, news)
        judges = get_judges(scale, readers, reviews, news)
        data = get_data(news, reviews, judges, reader_weight_holder)
        
        reviewer_weights = get_reviewer_weights(data)
        reader_weight_holder.inserts(reviewer_weights.to_dict())
        update_reader_weight(data, reader_weight_holder)

    reporter_scores = get_reporter_score(data)
    reporter_weighted_scores = get_reporter_score(data, weighted = True)
    
    draw_reporter_scores(scale, data, reporters, reporter_scores, reporter_weighted_scores)
    draw_user_weights(scale, reader_weight_holder.weights.values())

def run_simulate(times):
    
    scale = Scale()
    
    properties = {
        'times' : times,
        'scale' : scale,
        'readers' : get_readers(num['reader']),
        'reporters' : get_reporters(num['reporter'] , scale)
    }
    
    simulate(**properties)

run_simulate(10)

不公正的讀者

In [14]:
# 不公正的讀者
def get_unfair_readers(readers, percent = 0.2):
    unfair_readers = readers.sample(frac = percent).sort_values(by = 'reader_id')
    print('%d out of %d unfair readers with frac %.2f:' % (len(unfair_readers), len(readers), percent), list(unfair_readers['reader_id']))
    return unfair_readers

# 不公正的讀者會給出的分數
def get_unfair_scores():
    return [9, 10]

# 不公正的讀者所屬意的特定報導者
def get_target_reporters(reporters, percent = 0.2):
    return reporters.iloc[0:1]

評分不公正,評價公正

In [15]:
# 當不公正的讀者遇到所屬意的特定報導者寫出來的新聞,會一律評高分或低分
def fill_unfair_reviews(reviews, readers, reporters, unfair_readers, unfair_scores, target_reporters):

    grep_unfair_reviews = reviews['reviewer_id'].isin(unfair_readers['reader_id'])
    grep_unfair_reviews &= reviews['reporter_id'].isin(target_reporters['reporter_id'])
    
    # 符合以上條件的評分
    unfair_reviews = reviews[grep_unfair_reviews]
    
    # 修改評分分數
    reviews.loc[grep_unfair_reviews, 'review_score'] = np.random.choice(unfair_scores, len(unfair_reviews))
    
    return reviews
In [16]:
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def simulate_unfair_review(times, scale, unfair_scores, readers, unfair_readers, reporters, target_reporters):
    
    reader_weight_holder = ReaderWeightHolder(readers, scale.mean)
        
    for i in range(times):
        news = get_news(num['news_per_reporter'], scale, reporters)
        reviews = get_reviews(scale, readers, news)
        reviews = fill_unfair_reviews(reviews, readers, reporters, unfair_readers, unfair_scores, target_reporters)
        judges = get_judges(scale, readers, reviews, news)
        data = get_data(news, reviews, judges, reader_weight_holder)
        
        reviewer_weights = get_reviewer_weights(data)
        reader_weight_holder.inserts(reviewer_weights.to_dict())
        update_reader_weight(data, reader_weight_holder)

    reporter_scores = get_reporter_score(data)
    reporter_weighted_scores = get_reporter_score(data, weighted = True)
    
    draw_reporter_scores(scale, data, reporters, reporter_scores, reporter_weighted_scores)
    draw_user_weights(scale, reader_weight_holder.weights.values())

def run_simulate_unfair_review(times, unfair_reader_percent = 0.4):
    
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    
    properties = {
        'times' : times,
        'scale' : scale,
        'unfair_scores' : get_unfair_scores(),
        'readers' : readers,
        'unfair_readers' : get_unfair_readers(readers, unfair_reader_percent),
        'reporters' : reporters,
        'target_reporters' : get_target_reporters(reporters)
    }

    simulate_unfair_review(**properties)
    
run_simulate_unfair_review(10)
8 out of 20 unfair readers with frac 0.40: [0, 4, 6, 7, 9, 11, 13, 15]

評分公正,評價不公正

In [17]:
# 當不公正的讀者遇到所屬意的特定報導者寫出來的新聞的評分時,如果太低的話會評低分(打壓),高的話則是高分(讚賞)
def fill_unfair_judges(judges, readers, reporters, unfair_readers, target_reporters, scale):

    grep_unfair_judges = judges['judger_id'].isin(unfair_readers['reader_id'])
    grep_unfair_judges &= judges['reporter_id'].isin(target_reporters['reporter_id'])
    
    # 符合以上條件的評分
    unfair_judges = judges[grep_unfair_judges]

    def get_unfair_judge_scores(judge):
        unfair_scores = get_unfair_scores()
        if judge.review_score >= scale.mean: # 如果大於等於平均
            return np.random.choice(unfair_scores) # 就給予極度高分
        return scale.max - np.random.choice(unfair_scores) + 1 # 否則就給予極度低分
    
    # 修改評分分數
    judges.loc[grep_unfair_judges, 'judge_score'] = [get_unfair_judge_scores(judge) for judge in unfair_judges.itertuples()]
    
    return judges
In [18]:
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def simulate_unfair_judges(times, scale, readers, unfair_readers, reporters, target_reporters):
    
    reader_weight_holder = ReaderWeightHolder(readers, scale.mean)
    
    for i in range(times):
        news = get_news(num['news_per_reporter'], scale, reporters)
        reviews = get_reviews(scale, readers, news)
        judges = get_judges(scale, readers, reviews, news)
        judges = fill_unfair_judges(judges, readers, reporters, unfair_readers, target_reporters, scale)
        data = get_data(news, reviews, judges, reader_weight_holder)
        
        reviewer_weights = get_reviewer_weights(data)
        reader_weight_holder.inserts(reviewer_weights.to_dict())
        update_reader_weight(data, reader_weight_holder)

    reporter_scores = get_reporter_score(data)
    reporter_weighted_scores = get_reporter_score(data, weighted = True)
    
    draw_reporter_scores(scale, data, reporters, reporter_scores, reporter_weighted_scores)
    draw_user_weights(scale, reader_weight_holder.weights.values())

def run_simulate_unfair_judges(times, unfair_reader_percent = 0.4):
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    
    properties = {
        'times' : times,
        'scale' : scale,
        'readers' : readers,
        'unfair_readers' : get_unfair_readers(readers, unfair_reader_percent),
        'reporters' : reporters,
        'target_reporters' : get_target_reporters(reporters)
    }
    
    simulate_unfair_judges(**properties)

run_simulate_unfair_judges(10)
8 out of 20 unfair readers with frac 0.40: [3, 5, 6, 9, 10, 11, 13, 17]

評分和評價都不公正

In [19]:
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def simulate_unfair_review_and_judge(times, scale, unfair_scores, readers, unfair_readers, reporters, target_reporters, drawing = True):
    
    reader_weight_holder = ReaderWeightHolder(readers, scale.mean)
    
    for i in range(times):
        news = get_news(num['news_per_reporter'], scale, reporters)
        reviews = get_reviews(scale, readers, news)
        reviews = fill_unfair_reviews(reviews, readers, reporters, unfair_readers, unfair_scores, target_reporters)
        judges = get_judges(scale, readers, reviews, news)
        judges = fill_unfair_judges(judges, readers, reporters, unfair_readers, target_reporters, scale)
        data = get_data(news, reviews, judges, reader_weight_holder)

        reviewer_weights = get_reviewer_weights(data)
        reader_weight_holder.inserts(reviewer_weights.to_dict())
        update_reader_weight(data, reader_weight_holder)

    if drawing:
        reporter_scores = get_reporter_score(data)
        reporter_weighted_scores = get_reporter_score(data, weighted = True)

        draw_reporter_scores(scale, data, reporters, reporter_scores, reporter_weighted_scores)
        draw_user_weights(scale, reader_weight_holder.weights.values())
    
    return data, reporters

def run_simulate_unfair_review_and_judge(times, unfair_reader_percent = 0.4):
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    
    properties = {
        'times' : times,
        'scale' : scale,
        'readers' : readers,
        'unfair_scores' : get_unfair_scores(),
        'unfair_readers' : get_unfair_readers(readers, unfair_reader_percent),
        'reporters' : reporters,
        'target_reporters' : get_target_reporters(reporters)
    }
    
    dummy1, dummy2 = simulate_unfair_review_and_judge(**properties)
    
run_simulate_unfair_review_and_judge(10)
8 out of 20 unfair readers with frac 0.40: [3, 4, 6, 9, 14, 15, 17, 19]

極度不公正的讀者

In [20]:
# 移除不公正讀者的其他公正評分
def remove_fair_reviews(reviews, unfair_readers, target_reporters):
    
    # 不公正讀者
    grep_fair_reviews = reviews['reviewer_id'].isin(unfair_readers['reader_id'])
    
    # 對其他報導者的公正評分
    grep_fair_reviews &= ~reviews['reporter_id'].isin(target_reporters['reporter_id'])

    return reviews.drop(reviews[grep_fair_reviews].index)

# 移除不公正讀者的其他公正評價
def remove_fair_judges(judges, unfair_readers, target_reporters):
    
    # 不公正讀者
    grep_fair_judges = judges['judger_id'].isin(unfair_readers['reader_id'])
    
    # 對其他評分的公正評價
    grep_fair_judges &= ~judges['reporter_id'].isin(target_reporters['reporter_id'])
    
    return judges.drop(judges[grep_fair_judges].index)
In [21]:
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def simulate_extremely_unfair_review_and_judge(times, scale, unfair_scores, readers, unfair_readers, reporters, target_reporters, drawing = True):
    
    reader_weight_holder = ReaderWeightHolder(readers, scale.mean)
    
    for i in range(times):
        news = get_news(num['news_per_reporter'], scale, reporters)
        
        reviews = get_reviews(scale, readers, news)
        reviews = fill_unfair_reviews(reviews, readers, reporters, unfair_readers, unfair_scores, target_reporters)
        reviews = remove_fair_reviews(reviews, unfair_readers, target_reporters)
        
        judges = get_judges(scale, readers, reviews, news)
        judges = fill_unfair_judges(judges, readers, reporters, unfair_readers, target_reporters, scale)
        judges = remove_fair_judges(judges, unfair_readers, target_reporters)
        
        data = get_data(news, reviews, judges, reader_weight_holder)

        reviewer_weights = get_reviewer_weights(data)
        reader_weight_holder.inserts(reviewer_weights.to_dict())
        update_reader_weight(data, reader_weight_holder)

    if drawing:
        reporter_scores = get_reporter_score(data)
        reporter_weighted_scores = get_reporter_score(data, weighted = True)

        draw_reporter_scores(scale, data, reporters, reporter_scores, reporter_weighted_scores)
        draw_user_weights(scale, reader_weight_holder.weights.values())
    
    return data, reporters

def run_simulate_extremely_unfair_review_and_judge(times, unfair_reader_percent = 0.4):
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'] , scale)
    
    properties = {
        'times' : times,
        'scale' : scale,
        'readers' : readers,
        'unfair_scores' : get_unfair_scores(),
        'unfair_readers' : get_unfair_readers(readers, unfair_reader_percent),
        'reporters' : reporters,
        'target_reporters' : get_target_reporters(reporters)
    }
    
    dummy1, dummy2 = simulate_extremely_unfair_review_and_judge(**properties)
    
run_simulate_extremely_unfair_review_and_judge(10)
8 out of 20 unfair readers with frac 0.40: [0, 1, 4, 8, 13, 14, 15, 19]

不公正讀者比例對報導者可信度的影響

In [22]:
import matplotlib.ticker as ticker
%run src/scale.py
%run src/helper.py
%run src/reader_weight_holder.py

def simulate_unfair_reader_percents(times, unfair_reader_percents):

    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'], scale)
    results = pd.DataFrame(columns = ['percent', 'simple_avg', 'weight_avg'])

    for percent in unfair_reader_percents:

        properties = {
            'times' : times,
            'scale' : scale,
            'readers' : readers,
            'unfair_scores' : get_unfair_scores(),
            'unfair_readers' : get_unfair_readers(readers, percent),
            'reporters' : reporters,
            'target_reporters' : get_target_reporters(reporters),
            'drawing' : False
        }

        data, reporters = simulate_extremely_unfair_review_and_judge(**properties)
        data = data[data.reporter_id == 0]

        # 評分直接計算
        reporter_scores = get_reporter_score(data)

        # 評分*評分者可信度計算
        reporter_scores_weighted = get_reporter_score(data, weighted = True)
        
        result = {
            'percent': percent,
            'simple_avg': reporter_scores[0],
            'weight_avg': reporter_scores_weighted[0],
            'reporter_score': reporters.iloc[0].reporter_score
        }
        results = results.append(result, ignore_index = True)
        
    return results, scale

def run_simulate_unfair_reader_percents():
    
    unfair_reader_percents = [0.0, 0.1, 0.2, 0.3, 0.4, 0.6, 0.8]
    results, scale = simulate_unfair_reader_percents(10, unfair_reader_percents)
    results.set_index('percent', inplace = True)

    plt.plot(results['simple_avg'], label = 'simple avg')
    plt.plot(results['weight_avg'], label = 'weighted avg')
    plt.plot(results['reporter_score'], 'k--', label = 'origin', c = '0.55')
    
    plt.gca().xaxis.set_major_formatter(ticker.PercentFormatter(xmax = 1))
    plt.ylim(scale.min, scale.max)
    plt.xlabel("Percents")
    plt.ylabel("Reporters[0]'s scores")
    plt.title("Different percents of unfair readers")
    plt.legend(loc = 'upper left')
    plt.show()

run_simulate_unfair_reader_percents()
0 out of 20 unfair readers with frac 0.00: []
2 out of 20 unfair readers with frac 0.10: [5, 8]
4 out of 20 unfair readers with frac 0.20: [6, 12, 15, 18]
6 out of 20 unfair readers with frac 0.30: [1, 4, 5, 10, 12, 13]
8 out of 20 unfair readers with frac 0.40: [2, 6, 10, 12, 13, 15, 16, 18]
12 out of 20 unfair readers with frac 0.60: [0, 1, 3, 6, 8, 10, 11, 12, 14, 16, 18, 19]
16 out of 20 unfair readers with frac 0.80: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 18]

次數增加能否讓加權平均後的可信度接近真實可信度

In [ ]:
def simulate_unfair_reader_times(times, unfair_reader_percent = 0.4):
    
    scale = Scale()
    readers = get_readers(num['reader'])
    reporters = get_reporters(num['reporter'], scale)
    results = pd.DataFrame(columns = ['time', 'simple_avg', 'weight_avg'])
    
    for time in times:
        
        properties = {
            'times' : time,
            'scale' : scale,
            'readers' : readers,
            'unfair_scores' : get_unfair_scores(),
            'unfair_readers' : get_unfair_readers(readers, unfair_reader_percent),
            'reporters' : reporters,
            'target_reporters' : get_target_reporters(reporters),
            'drawing' : False
        }
        
        data, reporters = simulate_extremely_unfair_review_and_judge(**properties)
        data = data[data.reporter_id == 0]

        # 評分直接計算
        reporter_scores = get_reporter_score(data)

        # 評分*評分者可信度計算
        reporter_scores_weighted = get_reporter_score(data, weighted = True)
                
        result = {
            'time': time,
            'simple_avg': reporter_scores[0],
            'weight_avg': reporter_scores_weighted[0],
            'reporter_score': reporters.iloc[0].reporter_score
        }
        
        results = results.append(result, ignore_index = True)
        
    return results, scale
    
def run_simulate_unfair_reader_times():
    
    times = [5, 10, 20, 30, 50]
    results, scale = simulate_unfair_reader_times(times)
    results.set_index('time', inplace = True)

    plt.plot(results['simple_avg'], label = 'simple avg')
    plt.plot(results['weight_avg'], label = 'weighted avg')
    plt.plot(results['reporter_score'], 'k--', label = 'origin', c = '0.55')
    
    plt.ylabel("Reporters[0]'s scores")
    plt.title("Different times of simulates")
    plt.ylim(scale.min, scale.max)
    plt.legend(loc = 'upper left')
    plt.show()
    
run_simulate_unfair_reader_times()
8 out of 20 unfair readers with frac 0.40: [8, 10, 11, 12, 13, 14, 15, 16]
8 out of 20 unfair readers with frac 0.40: [2, 5, 6, 12, 13, 15, 17, 18]