extern crate rand; extern crate rand_chacha; extern crate rcflib; use num::abs; /// try cargo test --release /// these tests are designed to be longish use rand::{prelude::ThreadRng, Rng, SeedableRng}; use rand_chacha::ChaCha20Rng; use rand_core::RngCore; use rcflib::{ common::{multidimdatawithkey::MultiDimDataWithKey, samplesummary::summarize}, l1distance, l2distance, linfinitydistance, }; use rcflib::common::cluster::{Center, multi_cluster_as_object_with_weight_array, multi_cluster_as_ref, multi_cluster_as_weighted_ref, multi_cluster_obj, persist, single_centroid_cluster_slice_with_weight_arrays, single_centroid_cluster_weighted_vec, single_centroid_cluster_weighted_vec_with_distance_over_slices, single_centroid_unweighted_cluster_slice}; use rcflib::errors::RCFError; fn gen_data(data_size:usize, test_dimension:usize, seed:u64,yard_stick : f32) -> MultiDimDataWithKey { let mut mean = Vec::new(); let mut scale = Vec::new(); for i in 0..test_dimension { let mut vec1 = vec![0.0f32; test_dimension]; let mut vec2 = vec![0.0f32; test_dimension]; vec1[i] = 2.0 * yard_stick; vec2[i] = -2.0 * yard_stick; mean.push(vec1); mean.push(vec2); scale.push(vec![0.1f32; test_dimension]); scale.push(vec![0.1f32; test_dimension]); } MultiDimDataWithKey::mixture( data_size, &mean, &scale, &vec![0.5 / test_dimension as f32; 2 * test_dimension], seed, ) } fn test_center(result: &mut Vec
,test_dimension:usize, yard_stick : f32) -> bool { let mut answer = true; for i in 0..test_dimension { result.sort_by(|a, b| a.representative()[i].partial_cmp(&b.representative()[i]).unwrap()); answer = answer && abs(result[0].representative()[i] + 2.0 * yard_stick) < 0.2; answer = answer && abs(result[2 * test_dimension - 1].representative()[i] - 2.0 * yard_stick) < 0.2; for j in 1..(2 * test_dimension - 1) { answer = answer && abs(result[j].representative()[i]) < 0.2; } } answer } fn bad_distance(a : &T, b:&T) -> f64{ -1.0 } #[test] fn test_config() { let test_dimension = 3; let yard_stick = l1distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(1000,test_dimension,0u64,yard_stick); let mut input = Vec::new(); for i in 0..data_with_key.data.len() { input.push((data_with_key.data[i].clone(), 1.0f32)); } let mut result = single_centroid_cluster_weighted_vec_with_distance_over_slices(&input, bad_distance, 2 * test_dimension + 3, false); match &result { Ok(x) => assert!(false), Err(y) => assert!(true), }; let mut result = single_centroid_cluster_weighted_vec_with_distance_over_slices(&input, l2distance, 0, false); match &result { Ok(x) => assert!(false), Err(y) => assert!(true), }; let mut result = single_centroid_cluster_weighted_vec_with_distance_over_slices(&input, l2distance, 200, false); match &result { Ok(x) => assert!(false), Err(y) => assert!(true), }; let mut result = single_centroid_cluster_weighted_vec_with_distance_over_slices(&input, l2distance, 20, false); match &result { Ok(x) => assert!(true), Err(y) => assert!(false), }; } fn core( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input = Vec::new(); for i in 0..data_with_key.data.len() { input.push((data_with_key.data[i].clone(), 1.0f32)); } let mut result = single_centroid_cluster_weighted_vec_with_distance_over_slices(&input, distance, 2 * test_dimension + 3, false).unwrap(); let answer = (result.len() == 2 * test_dimension) && test_center(&mut result,test_dimension,yard_stick); println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_cluster() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (core(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); } fn core_as_slice_uniform( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input:Vec<&[f32]> = Vec::new(); for i in 0..data_with_key.data.len() { input.push(&data_with_key.data[i]); } let mut result = single_centroid_unweighted_cluster_slice(&input, distance, 2 * test_dimension + 3, false).unwrap(); let answer = (result.len() == 2 * test_dimension) && test_center(&mut result,test_dimension,yard_stick); println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_slice_uniform() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (core_as_slice_uniform(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); } fn core_as_slice_weighted( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input:Vec<&[f32]> = Vec::new(); for i in 0..data_with_key.data.len() { input.push(&data_with_key.data[i]); } let weights = vec![1.0f32;data_with_key.data.len()]; let mut result = single_centroid_cluster_slice_with_weight_arrays(&input, &weights, distance, 2 * test_dimension + 3, false).unwrap(); let answer = (result.len() == 2 * test_dimension) && test_center(&mut result,test_dimension,yard_stick); println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_slice_weighted() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (core_as_slice_weighted(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); } fn vec_dist(a: &Vec, b: &Vec) -> f64 { l1distance(&a,&b) } fn core_vec( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&Vec, &Vec) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input = Vec::new(); for i in 0..data_with_key.data.len() { input.push((data_with_key.data[i].clone(),1.0f32)); } let mut result = single_centroid_cluster_weighted_vec(&input, distance, 2 * test_dimension + 3, false).unwrap(); let answer = (result.len() == 2 * test_dimension) && test_center(&mut result,test_dimension,yard_stick); println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_vec() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (core_vec(200000, d, seed, vec_dist) == false) as i32; } assert!(error < 5); } fn multi_as_vec( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input:Vec> = Vec::new(); for i in 0..data_with_key.data.len() { input.push(data_with_key.data[i].clone()); } let mut result = multi_cluster_obj(&input, vec_dist, 5,0.1,true,2 * test_dimension + 3, false).unwrap(); let mut answer = (result.len() == 2 * test_dimension); for i in 0..test_dimension { result.sort_by(|a, b| a.representatives()[0].0[i].partial_cmp(&b.representatives()[0].0[i]).unwrap()); answer = answer && abs(result[0].representatives()[0].0[i] + 2.0 * yard_stick) < 0.5; answer = answer && abs(result[2 * test_dimension - 1].representatives()[0].0[i] - 2.0 * yard_stick) < 0.5; for j in 1..(2 * test_dimension - 1) { answer = answer && abs(result[j].representatives()[0].0[i]) < 0.5; } } println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_multi_vec() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (multi_as_vec(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); } fn multi_as_ref( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input = Vec::new(); for i in 0..data_with_key.data.len() { input.push(&data_with_key.data[i]); } let mut result = multi_cluster_as_ref(&input, vec_dist, 5,0.1,true,2 * test_dimension + 3, false).unwrap(); let mut answer = (result.len() == 2 * test_dimension); for i in 0..test_dimension { result.sort_by(|a, b| a.representatives()[0].0[i].partial_cmp(&b.representatives()[0].0[i]).unwrap()); answer = answer && abs(result[0].representatives()[0].0[i] + 2.0 * yard_stick) < 0.5; answer = answer && abs(result[2 * test_dimension - 1].representatives()[0].0[i] - 2.0 * yard_stick) < 0.5; for j in 1..(2 * test_dimension - 1) { answer = answer && abs(result[j].representatives()[0].0[i]) < 0.5; } } println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_multi_ref() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (multi_as_ref(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); } fn multi_as_weighted_ref( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input = Vec::new(); for i in 0..data_with_key.data.len() { input.push((&data_with_key.data[i],1.0f32)); } let mut result = multi_cluster_as_weighted_ref(&input, vec_dist, 5,0.1,true,2 * test_dimension + 3, false).unwrap(); let mut answer = (result.len() == 2 * test_dimension); for i in 0..test_dimension { result.sort_by(|a, b| a.representatives()[0].0[i].partial_cmp(&b.representatives()[0].0[i]).unwrap()); answer = answer && abs(result[0].representatives()[0].0[i] + 2.0 * yard_stick) < 0.5; answer = answer && abs(result[2 * test_dimension - 1].representatives()[0].0[i] - 2.0 * yard_stick) < 0.5; for j in 1..(2 * test_dimension - 1) { answer = answer && abs(result[j].representatives()[0].0[i]) < 0.5; } } println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_multi_weighted_ref() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (multi_as_weighted_ref(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); } fn multi_as_vec_weighted( data_size: usize, test_dimension: usize, seed: u64, distance: fn(&[f32], &[f32]) -> f64, ) -> bool { println!(" starting {}",test_dimension); let yard_stick = distance(&vec![0.0; test_dimension], &vec![1.0; test_dimension]) as f32; let data_with_key = gen_data(data_size,test_dimension,seed,yard_stick); let mut input:Vec> = Vec::new(); for i in 0..data_with_key.data.len() { input.push(data_with_key.data[i].clone()); } let weights = vec![1.0f32;data_with_key.data.len()]; let mut ref_result = multi_cluster_as_object_with_weight_array(&input, &weights, vec_dist, 5,0.1,true,2 * test_dimension + 3, false).unwrap(); let mut result = persist(&ref_result); let mut answer = (result.len() == 2 * test_dimension); for i in 0..test_dimension { result.sort_by(|a, b| a.representatives()[0].0[i].partial_cmp(&b.representatives()[0].0[i]).unwrap()); answer = answer && abs(result[0].representatives()[0].0[i] + 2.0 * yard_stick) < 0.5; answer = answer && abs(result[2 * test_dimension - 1].representatives()[0].0[i] - 2.0 * yard_stick) < 0.5; for j in 1..(2 * test_dimension - 1) { answer = answer && abs(result[j].representatives()[0].0[i]) < 0.5; } } println!(" done {} {}",test_dimension,answer); answer } #[test] fn benchmark_multi_vec_weighted() { let mut generator = ThreadRng::default(); let one_seed: u64 = generator.gen(); println!(" single seed is {}", one_seed); let mut rng = ChaCha20Rng::seed_from_u64(one_seed); let mut error = 0; for _ in 0..10 { let seed = rng.next_u64(); let d = rng.gen_range(3..23); error += (multi_as_vec_weighted(200000, d, seed, l1distance) == false) as i32; } assert!(error < 5); }