Hướng dẫn hypothesis testing in python datacamp github - kiểm tra giả thuyết trong python datacamp github

#----------------------------------------------------------------------------------# #Chapter 3 - Introduction to hypothesis testing #----------------------------------------------------------------------------------# #Generating a permutation sample def permutation_sample(data1, data2): """Generate a permutation sample from two data sets.""" # Concatenate the data sets: data data = np.concatenate((data1, data2)) # Permute the concatenated array: permuted_data permuted_data = np.random.permutation(data) # Split the permuted array into two: perm_sample_1, perm_sample_2 perm_sample_1 = permuted_data[:len(data1)] perm_sample_2 = permuted_data[len(data1):] return perm_sample_1, perm_sample_2 #----------------------------------------------------------------------------------# #Visualizing permutation sampling for _ in range(50): # Generate permutation samples perm_sample_1, perm_sample_2 = permutation_sample( rain_july, rain_november) # Compute ECDFs x_1, y_1 = ecdf(perm_sample_1) x_2, y_2 = ecdf(perm_sample_2) # Plot ECDFs of permutation sample _ = plt.plot(x_1, y_1, marker='.', linestyle='none', color='red', alpha=0.02) _ = plt.plot(x_2, y_2, marker='.', linestyle='none', color='blue', alpha=0.02) # Create and plot ECDFs from original data x_1, y_1 = ecdf(rain_july) x_2, y_2 = ecdf(rain_november) _ = plt.plot(x_1, y_1, marker='.', linestyle='none', color='red') _ = plt.plot(x_2, y_2, marker='.', linestyle='none', color='blue') # Label axes, set margin, and show plot plt.margins(0.02) _ = plt.xlabel('monthly rainfall (mm)') _ = plt.ylabel('ECDF') plt.show() #----------------------------------------------------------------------------------# #Generating permutation replicates def draw_perm_reps(data_1, data_2, func, size=1): """Generate multiple permutation replicates.""" # Initialize array of replicates: perm_replicates perm_replicates = np.empty(size) for i in range(size): # Generate permutation sample perm_sample_1, perm_sample_2 = permutation_sample(data_1, data_2) # Compute the test statistic perm_replicates[i] = func(perm_sample_1, perm_sample_2) return perm_replicates #----------------------------------------------------------------------------------# #Look before you leap: EDA before hypothesis testing # Make bee swarm plot _ = sns.swarmplot(x='ID', y='impact_force', data=df) # Label axes _ = plt.xlabel('frog') _ = plt.ylabel('impact force (N)') # Show the plot plt.show() #----------------------------------------------------------------------------------# #Permutation test on frog data def diff_of_means(data_1, data_2): """Difference in means of two arrays.""" # The difference of means of data_1, data_2: diff diff = np.mean(data_1) - np.mean(data_2) return diff # Compute difference of mean impact force from experiment: empirical_diff_means empirical_diff_means = diff_of_means(force_a, force_b) # Draw 10,000 permutation replicates: perm_replicates perm_replicates = draw_perm_reps(force_a, force_b, diff_of_means, size=10000) # Compute p-value: p p = np.sum(perm_replicates >= empirical_diff_means) / len(perm_replicates) # Print the result print('p-value =', p) #----------------------------------------------------------------------------------# #A one-sample bootstrap hypothesis test # Make an array of translated impact forces: translated_force_b translated_force_b = force_b - np.mean(force_b) + 0.55 # Take bootstrap replicates of Frog B's translated impact forces: bs_replicates bs_replicates = draw_bs_reps(translated_force_b, np.mean, 10000) # Compute fraction of replicates that are less than the observed Frog B force: p p = np.sum(bs_replicates <= np.mean(force_b)) / 10000 # Print the p-value print('p = ', p) #----------------------------------------------------------------------------------# #A bootstrap test for identical distributions # Compute difference of mean impact force from experiment: empirical_diff_means empirical_diff_means = diff_of_means(force_a, force_b) # Concatenate forces: forces_concat forces_concat = np.concatenate((force_a, force_b)) # Initialize bootstrap replicates: bs_replicates bs_replicates = np.empty(10000) for i in range(10000): # Generate bootstrap sample bs_sample = np.random.choice(forces_concat, size=len(forces_concat)) # Compute replicate bs_replicates[i] = diff_of_means(bs_sample[:len(force_a)], bs_sample[len(force_a):]) # Compute and print p-value: p p = np.sum(bs_replicates >= empirical_diff_means) / len(bs_replicates) print('p-value =', p) #----------------------------------------------------------------------------------# #A two-sample bootstrap hypothesis test for difference of means. # Compute mean of all forces: mean_force mean_force = np.mean(forces_concat) # Generate shifted arrays force_a_shifted = force_a - np.mean(force_a) + mean_force force_b_shifted = force_b - np.mean(force_b) + mean_force # Compute 10,000 bootstrap replicates from shifted arrays bs_replicates_a = draw_bs_reps(force_a_shifted, np.mean, size=10000) bs_replicates_b = draw_bs_reps(force_b_shifted, np.mean, size=10000) # Get replicates of difference of means: bs_replicates bs_replicates = bs_replicates_a - bs_replicates_b # Compute and print p-value: p p = np.sum(bs_replicates >= empirical_diff_means) / len(bs_replicates) print('p-value =', p) #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------# #----------------------------------------------------------------------------------#