use chrono::{Days, Local, NaiveDate, NaiveTime, Timelike};
use rayon::iter::{
    IntoParallelIterator, IntoParallelRefIterator, ParallelBridge, ParallelExtend, ParallelIterator,
};
use regex::Regex;
use serde::Deserialize;
use std::collections::{HashMap, HashSet};
use std::error::Error;
use std::fs::{self, File};
use std::io::{BufRead, BufReader, ErrorKind};
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Mutex, OnceLock};
use strsim::levenshtein;

#[derive(Debug, Deserialize, Clone)]
#[allow(dead_code)]
struct RouteSubAgencies {
    route_id: String,
    route_licence_number: Option<i32>,
    sub_agency_id: i32,
    sub_agency_name: String,
}

#[derive(Debug, Deserialize, Clone)]
#[allow(dead_code)]
struct Agency {
    agency_id: String,
    agency_name: String,
    agency_url: String,
    agency_timezone: String,
    // agency_lang: String,
    // agency_phone: String,
    // agency_fare_url: String,
    // agency_email: String,
}

#[derive(Debug, Deserialize, Clone)]
#[allow(dead_code)]
struct Routes {
    route_id: String,
    agency_id: String,
    route_short_name: String,
    route_long_name: String,
    route_type: i32,
    // route_color: String,
    // route_text_color: String,
}

#[derive(Debug, Deserialize, Clone, Default)]
#[allow(dead_code)]
struct StopTimes {
    trip_id: String,
    arrival_time: String,
    departure_time: String,
    stop_id: String,
    stop_sequence: u32,
    // stop_headsign: String,
    // pickup_type: String,
    // drop_off_type: String,
    // shape_dist_traveled: i32,
    // timepoint: String,
    // stop_zone_ids: String,
}

#[derive(Debug, Deserialize, Clone)]
#[allow(dead_code)]
struct Trips {
    route_id: String,
    service_id: String,
    trip_id: String,
    // trip_headsign: String,
    wheelchair_accessible: Option<i32>,
    // block_id: Option<i32>,
    block_id: Option<String>,
    direction_id: i32,
    // bikes_allowed: i32,
    // exceptional: i32,
}

#[derive(Debug, Deserialize, Clone, Default)]
#[allow(dead_code)]
struct Stops {
    stop_id: String,
    // stop_code: String,
    stop_name: String,
    // stop_desc: String,
    stop_lat: String,
    stop_lon: String,
    zone_id: String,
    // stop_url: String,
    location_type: String,
    // parent_station: String,
    // stop_timezone: String,
    wheelchair_boarding: String,
    // platform_code: String,
}

const SEARCH_DIR: &str = "/home/adam/projects/GTFS/";

const AGENCIES_TO_REMOVE: [&str; 5] = [
    // https://pid.cz/o-systemu/opendata/
    // https://data.pid.cz/PID_GTFS.zip
    "Dopravní podnik hl.m. Prahy, a.s.",
    // https://data.brno.cz/datasets/j%C3%ADzdn%C3%AD-%C5%99%C3%A1d-ids-jmk-ve-form%C3%A1tu-gtfs-gtfs-timetable-data/about
    // https://www.arcgis.com/sharing/rest/content/items/379d2e9a7907460c8ca7fda1f3e84328/data
    "Dopravní podnik města Brna, a.s.",
    // "IDS JMK (Data from: KORDIS JMK, DPMB)",
    // https://www.dpmlj.cz/opendata
    // https://www.dpmlj.cz/gtfs.zip
    "Dopravní podnik měst Liberce a Jablonce nad Nisou, a.s.",
    // https://www.dpmo.cz/informace-pro-cestujici/jizdni-rady/jizdni-rady-gtfs/
    // https://www.dpmo.cz/doc/dpmo-olomouc-cz.zip
    "Dopravní podnik města Olomouce, a.s.",
    // https://opendata.plzen.eu/public/opendata/detail/187
    // https://jizdnirady.pmdp.cz/jr/gtfs
    "Plzeňské městské dopravní podniky, a.s.",
];

const AGENCIES_TO_REMOVE_FOLDERS: [&str; 5] = [
    // https://pid.cz/o-systemu/opendata/
    // https://data.pid.cz/PID_GTFS.zip
    // "Dopravní podnik hl.m. Prahy, a.s.",
    "Pražská integrovaná doprava",
    // https://data.brno.cz/datasets/j%C3%ADzdn%C3%AD-%C5%99%C3%A1d-ids-jmk-ve-form%C3%A1tu-gtfs-gtfs-timetable-data/about
    // https://www.arcgis.com/sharing/rest/content/items/379d2e9a7907460c8ca7fda1f3e84328/data
    // "Dopravní podnik města Brna, a.s.",
    "IDS JMK (Data from: KORDIS JMK, DPMB)",
    // https://www.dpmlj.cz/opendata
    // https://www.dpmlj.cz/gtfs.zip
    "Dopravní podnik měst Liberce a Jablonce nad Nisou, a.s.",
    // https://www.dpmo.cz/informace-pro-cestujici/jizdni-rady/jizdni-rady-gtfs/
    // https://www.dpmo.cz/doc/dpmo-olomouc-cz.zip?
    "Dopravní podnik města Olomouce, a.s.",
    // https://opendata.plzen.eu/public/opendata/detail/187
    // https://jizdnirady.pmdp.cz/jr/gtfs
    "Plzeňské městské dopravní podniky, a.s.",
];

const DRY_RUN: bool = true;

fn main() -> Result<(), Box<dyn Error>> {
    let grouped_dirs = group_directories_by_routes()?;

    let grouped_grouped_dirs = group_grouped_dirs_by_start_date(&grouped_dirs);

    let mut rdr = csv::Reader::from_path("gtfs/PID_GTFS/route_sub_agencies.txt").unwrap();
    let mut transport_lines_in_pid = Vec::new();

    for ii in rdr.deserialize() {
        let record: RouteSubAgencies = ii.unwrap();

        if let Some(v) = record.route_licence_number {
            transport_lines_in_pid.push(v);
        }
    }

    let mut agencies_lines_and_stoptimes = Vec::new();
    for folder in fs::read_dir("gtfs").unwrap() {
        let folder = folder.unwrap();

        let abc = function_idk(&folder.path());
        let agency_name = get_agency_name_from_folder(&folder.path());

        agencies_lines_and_stoptimes.push((agency_name.clone(), abc));

        assert!(
            AGENCIES_TO_REMOVE_FOLDERS.contains(&agency_name.as_str()),
            "{:#?} {:#?}",
            dbg!(AGENCIES_TO_REMOVE_FOLDERS),
            dbg!(agency_name)
        );
    }
    assert!(AGENCIES_TO_REMOVE_FOLDERS.len() == agencies_lines_and_stoptimes.len());

    let count_of_timetable_groups_with_same_start_date = AtomicUsize::new(0);
    let count_of_timetables_with_same_start_date = AtomicUsize::new(0);
    let count_timetables_deleted = AtomicUsize::new(0);
    let count_timetables_deleted_duplicates = AtomicUsize::new(0);
    let count_timetables_deleted_outdated = AtomicUsize::new(0);
    let count_timetables_deleted_empty = AtomicUsize::new(0);
    let count_timetables_deleted_pid = AtomicUsize::new(0);
    let count_timetables_deleted_idk = AtomicUsize::new(0);
    let count_timetables_deleted_hashmap = Mutex::new(HashMap::new());
    let count_timetables_deleted_hashmap_ = Mutex::new(HashMap::new());
    for i in AGENCIES_TO_REMOVE {
        count_timetables_deleted_hashmap
            .lock()
            .unwrap()
            .insert(i.to_owned(), 0);

        count_timetables_deleted_hashmap_
            .lock()
            .unwrap()
            .insert(i.to_owned(), 0);
    }
    println!("{}", grouped_grouped_dirs.0.len());
    grouped_grouped_dirs.0.into_par_iter().for_each(
        |(mut group, count_of_start_dates_in_the_future)| {
            let mut test_bool = (Vec::new(), Vec::new(), Vec::new());
            for i in &group {
                if i.len() <= 1 {
                    continue;
                }
                count_of_timetable_groups_with_same_start_date.fetch_add(1, Ordering::Relaxed);
                count_of_timetables_with_same_start_date.fetch_add(i.len(), Ordering::Relaxed);
            }

            for i in
                0..(group.len() - 1).saturating_sub(count_of_start_dates_in_the_future as usize)
            {
                for ii in &group[i] {
                    println!("deleted__: {:?}", Path::new(SEARCH_DIR).join(ii));
                    remove_folder(Path::new(SEARCH_DIR).join(ii));
                    count_timetables_deleted.fetch_add(1, Ordering::Relaxed);
                    count_timetables_deleted_outdated.fetch_add(1, Ordering::Relaxed);
                    test_bool.0.push(ii.clone());
                }

                group[i] = Vec::new();
            }

            for ii in &mut group {
                let mut indexes_to_delete = Vec::new();
                let mut test = false;
                let mut test_ = 0;
                for i in 0..ii.len() {
                    for j in (i + 1)..ii.len() {
                        if !are_two_folders_identical(ii, i, j) {
                            continue;
                        }
                        if ii[i] == "7202" {
                            test = true;
                            test_ = i;
                        }
                        println!("deleted_: {:?}", Path::new(SEARCH_DIR).join(ii[i].clone()));
                        remove_folder(Path::new(SEARCH_DIR).join(ii[i].clone()));
                        count_timetables_deleted.fetch_add(1, Ordering::Relaxed);
                        count_timetables_deleted_duplicates.fetch_add(1, Ordering::Relaxed);
                        test_bool.1.push(ii[i].clone());
                        indexes_to_delete.push(i);
                        break;
                    }
                }
                if test {
                    assert!(indexes_to_delete.contains(&test_));
                }
                for i in indexes_to_delete.iter().rev() {
                    let len_tmp = ii.len();
                    assert!(ii.len()+1 == len_tmp);
                }
            }

            for i in &group {
                for ii in i {

                    count_timetables_deleted_idk.fetch_add(1, Ordering::Relaxed);
                    let path = Path::new(SEARCH_DIR).join(&ii);
                    if is_file_empty(&path.join("stop_times.txt")) {
                        assert!(is_file_empty(
                            &path.join("calendar.txt")
                        ));
                        assert!(is_file_empty(
                            &path.join("calendar_dates.txt")
                        ));
                        assert!(is_file_empty(
                            &path.join("trips.txt")
                        ));
                        assert!(is_file_empty(
                            &path.join("frequencies.txt")
                        ));

                        panic!("{path:#?}");

                    } else {
                        let mut is_pid = false;
                        if transport_lines_in_pid
                            .contains(&get_line_from_folder(&path))
                        {
                            count_timetables_deleted_pid.fetch_add(1, Ordering::Relaxed);
                            is_pid = true;
                        }
                        let mut is_something = String::new();
                        if AGENCIES_TO_REMOVE.contains(
                            &get_agency_name_from_folder(&path).as_str(),
                        ) {
                            if get_agency_name_from_folder(&path) == AGENCIES_TO_REMOVE[0]  {
                                assert!(is_pid || get_line_from_folder(&path) == 199042, "{} {}", get_agency_name_from_folder(&path), get_line_from_folder(&path));
                            }
                            is_something = get_agency_name_from_folder(&path);

                            count_timetables_deleted_hashmap_
                                .lock()
                                .unwrap()
                                .entry(
                                    get_agency_name_from_folder(&path)
                                        .clone(),
                                )
                                .and_modify(|x| *x += 1);
                        }

                        let mut lines_to_remove = Vec::new();
                        for agency in &agencies_lines_and_stoptimes {
                            lines_to_remove.push((
                                agency.0.clone(),
                                function_idk_2(&ii, &agency.1, &agency.0),
                            ));
                        }

                        assert!(lines_to_remove.iter().map(|x| x.1.len()).sum::<usize>() <= 1);
                        assert!(!has_duplicates(&lines_to_remove));

                        if lines_to_remove.iter().find(|x| x.0 == AGENCIES_TO_REMOVE_FOLDERS[0]).unwrap().1.len() == 1 {
                           assert!(is_pid, "{lines_to_remove:#?} {AGENCIES_TO_REMOVE:#?} {AGENCIES_TO_REMOVE_FOLDERS:#?}");
                        }

                        let mut agency_name_match = "";
                        for line in &lines_to_remove {

                            count_timetables_deleted_hashmap
                                .lock()
                                .unwrap()
                                .get(
                                    AGENCIES_TO_REMOVE[AGENCIES_TO_REMOVE_FOLDERS
                                        .iter()
                                        .position(|x| *x == line.0)
                                        .unwrap()],
                                )
                                .unwrap();
                            count_timetables_deleted_hashmap
                                .lock()
                                .unwrap()
                                .entry(
                                    AGENCIES_TO_REMOVE[AGENCIES_TO_REMOVE_FOLDERS
                                        .iter()
                                        .position(|x| *x == line.0)
                                        .unwrap()]
                                    .to_owned(),
                                )
                                .and_modify(|x| *x += line.1.len());
                            // println!("Time elapsed: {:?}", start.elapsed());

                            if line.1.len() == 1 {

                            agency_name_match = AGENCIES_TO_REMOVE[AGENCIES_TO_REMOVE_FOLDERS
                                        .iter()
                                        .position(|x| *x == line.0)
                                        .unwrap()];

                            }

                            assert!(line.1.len() == 0 || line.1.len() == 1);
                        }

                        if !is_something.is_empty() {
                            if is_something != agency_name_match {
                                println!("xyz {is_something} {agency_name_match} {} {ii}", get_line_from_folder(&path));
                                assert!(agency_name_match.is_empty());
                            }
                        }

                        if is_pid || !agency_name_match.is_empty() {
                            remove_folder(path);
                            count_timetables_deleted.fetch_add(1, Ordering::Relaxed);
                            test_bool.2.push(ii);
                        }
                    }
                }
            }

            let mut test_bool__ = HashSet::new();

            for i in &test_bool.0 {
                assert!(test_bool__.insert(i.to_string()));
            }
            for i in &test_bool.1 {
                assert!(test_bool__.insert(i.to_string()));
            }
            for i in &test_bool.2 {
                assert!(test_bool__.insert(i.to_string()), "{:#?}", test_bool);
            }
        },
    );

    for i in grouped_grouped_dirs.1 {
        if is_file_empty(&Path::new(SEARCH_DIR).join(&i).join("stop_times.txt")) {
            assert!(is_file_empty(
                &Path::new(SEARCH_DIR).join(&i).join("calendar.txt")
            ));
            assert!(is_file_empty(
                &Path::new(SEARCH_DIR).join(&i).join("calendar_dates.txt")
            ));
            assert!(is_file_empty(
                &Path::new(SEARCH_DIR).join(&i).join("trips.txt")
            ));
            assert!(is_file_empty(
                &Path::new(SEARCH_DIR).join(&i).join("frequencies.txt")
            ));

            // remove_folder(Path::new(search_dir).join(i)).unwrap();

            count_timetables_deleted.fetch_add(1, Ordering::Relaxed);
            count_timetables_deleted_empty.fetch_add(1, Ordering::Relaxed);
        } else {
            panic!();
        }
    }

    dbg!(count_timetables_deleted);
    dbg!(count_timetables_deleted_outdated);
    dbg!(count_timetables_deleted_duplicates);
    dbg!(count_timetables_deleted_empty);
    dbg!(count_timetables_deleted_pid);
    dbg!(count_timetables_deleted_idk);
    dbg!(count_of_timetables_with_same_start_date);
    dbg!(count_of_timetable_groups_with_same_start_date);
    dbg!(count_timetables_deleted_hashmap_);
    dbg!(count_timetables_deleted_hashmap);

    Ok(())
}

fn route_long_names_distance(filter: &str, ii: &str) -> usize {
    let a = &normalize_route_long_name(filter);
    let b = &normalize_route_long_name(ii);

    let binding = a.replace('.', ". ");
    let a_ = binding.split(' ').collect::<Vec<_>>();
    let binding = b.replace('.', ". ");
    let b_ = binding.split(' ').collect::<Vec<_>>();

    let mut count = 0;
    if a.contains('.') {
        for i in a_ {
            if i.chars().count() < 3 && !i.contains('.') {
                continue;
            }
            if i.contains('.') && b_.iter().any(|x| x.starts_with(&i[..i.len() - 1]))
                || b_.contains(&i)
            {
                count += 1;
            }
        }
    } else if b.contains('.') {
        for i in b_ {
            if i.chars().count() < 3 && !i.contains('.') {
                continue;
            }
            if i.contains('.') && a_.iter().any(|x| x.starts_with(&i[..i.len() - 1]))
                || a_.contains(&i)
            {
                count += 1;
            }
        }
    } else {
        for i in a_ {
            if i.chars().count() < 3 {
                continue;
            }
            if b_.contains(&i) {
                count += 1;
            }
        }
    }
    if count == 0 {
        return 1_000_000;
    }
    if count >= 3 {
        return 0;
    }

    levenshtein(a, b)
}

fn stop_names_distance(filter: &str, ii: &str) -> usize {
    let a = &normalize_stop_name(filter);
    let b = &normalize_stop_name(ii);

    let a_ = a.split(' ').collect::<Vec<_>>();
    let b_ = b.split(' ').collect::<Vec<_>>();

    let mut count = 0;
    if a.contains('.') {
        for i in &a_ {
            if i.chars().count() < 3 && !i.contains('.') {
                continue;
            }
            if i.contains('.') && b_.iter().any(|x| x.starts_with(&i[..i.len() - 1]))
                || b_.contains(i)
            {
                count += 1;
            }
        }
    } else if b.contains('.') {
        for i in &b_ {
            if i.chars().count() < 3 && !i.contains('.') {
                continue;
            }
            if i.contains('.') && a_.iter().any(|x| x.starts_with(&i[..i.len() - 1]))
                || a_.contains(i)
            {
                count += 1;
            }
        }
    } else {
        for i in &a_ {
            if i.chars().count() < 3 {
                continue;
            }
            if b_.contains(i) {
                count += 1;
            }
        }
    }
    if count == 0 {
        return 1_000_000;
    }
    if count >= 4 {
        return 0;
    }
    if (a_.len() == b_.len() && a_.len() == count) || count >= 3 {
        assert!(count >= 1, "{a_:#?} {b_:#?}");
        return 1;
    }

    levenshtein(a, b)
}

fn regex_whitespace() -> &'static Regex {
    static RE: OnceLock<Regex> = OnceLock::new();
    RE.get_or_init(|| Regex::new(r"\s+").unwrap())
}

fn normalize_route_long_name(input: &str) -> String {
    let s = regex_whitespace().replace_all(input, " ").to_string();

    let s = s
        .replace([',', ':'], " ")
        .replace('–', "-")
        .replace('-', " - ");

    let s = regex_whitespace().replace_all(&s, " ").to_string();

    s.to_lowercase().trim().to_owned()
}

fn normalize_stop_name(input: &str) -> String {
    let s = regex_whitespace().replace_all(input, " ").to_string();
    for (index, i) in regex_brackets().find_iter(&s).enumerate() {
        assert!(index == 0);
        assert!(i.len() == 4);
    }
    let s = regex_brackets().replace_all(&s, "").to_string();

    let s = regex_number().replace_all(&s, "").to_string();

    let s = s.replace(',', " ").replace('.', ". ");

    let s = regex_whitespace().replace_all(&s, " ").to_string();

    s.to_lowercase().trim().to_owned()
}

fn group_grouped_dirs_by_start_date(
    grouped_dirs: &Vec<Vec<String>>,
) -> (Vec<(Vec<Vec<String>>, u32)>, Vec<String>) {
    let today = Local::now()
        .date_naive()
        .checked_sub_days(Days::new(2))
        .unwrap();
    let mut grouped_grouped_dirs: Vec<(HashMap<String, Vec<String>>, u32)> = Vec::new();
    let mut timetables_without_start_date = Vec::new();
    for group in grouped_dirs {

        let mut content_map: HashMap<String, Vec<String>> = HashMap::new();
        let mut count_of_start_dates_in_the_future = 0;

        for group_ in group {
            assert!(!group_.is_empty());
            let calendar_path = Path::new(SEARCH_DIR).join(group_).join("calendar.txt");
            let Some(start_date) = parse_start_date(&calendar_path) else {
                timetables_without_start_date.push(group_.to_owned());
                continue;
            };
            if start_date > today {
                count_of_start_dates_in_the_future += 1;
            }
            content_map
                .entry(start_date.to_string())
                .or_default()
                .push(group_.to_string());
        }
        if content_map.is_empty() {
            continue;
        }
        grouped_grouped_dirs.push((content_map, count_of_start_dates_in_the_future));
    }

    for (group, _) in &mut grouped_grouped_dirs {
        group.iter().collect::<Vec<_>>().sort_by_key(|dir_name| {
            dir_name.0
        });
    }

    (
        grouped_grouped_dirs
            .into_iter()
            .map(|(hashmap, count_of_start_dates_in_the_future)| {
                assert!(!hashmap.is_empty());
                let mut vec: Vec<(String, Vec<String>)> = hashmap.into_iter().collect::<Vec<_>>();

                assert!(!vec.is_empty());

                vec.sort_by_key(|k| k.0.clone());
                (
                    vec.into_iter().map(|(_, x)| x).collect(),
                    count_of_start_dates_in_the_future,
                )
            })
            .collect(),
        timetables_without_start_date,
    )
}

fn group_directories_by_routes() -> Result<Vec<Vec<String>>, Box<dyn Error>> {
    let mut content_map: HashMap<String, Vec<String>> = HashMap::new();

    for entry in fs::read_dir(SEARCH_DIR)? {
        let entry = entry?;
        let path = entry.path();

        if path.is_dir() {
            let routes_file = path.join("routes.txt");
            if routes_file.exists() {
                let content = read_file_content(&routes_file)?;
                let dir_name = path.file_name().unwrap().to_string_lossy().into_owned();
                content_map.entry(content).or_default().push(dir_name);
            }
        }
    }
    Ok(content_map.into_values().collect())
}

fn parse_start_date(file_path: &Path) -> Option<NaiveDate> {
    if let Ok(file) = File::open(file_path) {
        let mut reader = csv::Reader::from_reader(file);
        let mut records = reader.records();

        if let Some(record) = records.next() {
            let record = record.unwrap();
            if !record.is_empty() {
                let abc = NaiveDate::parse_from_str(&record[8], "%Y%m%d");
                let abcd = abc.unwrap();
                return Some(abcd);
            }
        }
    }

    if let Ok(file) = File::open(
        file_path
            .to_str()
            .unwrap()
            .replace("/calendar.txt", "/calendar_dates.txt"),
    ) {
        let mut reader = csv::Reader::from_reader(file);
        let mut records = reader.records();

        if let Some(record) = records.next() {
            let record = record.unwrap();
            if !record.is_empty() {
                let abc = NaiveDate::parse_from_str(&record[1], "%Y%m%d");
                let abcd = abc.unwrap();
                return Some(abcd);
            }
        }
    }

    None
}

fn read_file_content(file_path: &Path) -> Result<String, Box<dyn Error>> {
    let file = File::open(file_path)?;
    let reader = BufReader::new(file);
    let binding = reader.lines().collect::<Result<String, _>>()?;
    let mut content: Vec<&str> = binding.splitn(13, ',').collect::<Vec<_>>();
    content.remove(12);
    assert!(
        content[10].chars().rev().nth(2).unwrap() == '-',
        "{:#?}",
        dbg!(content)
    );
    content[10] = "";
    assert!(content.concat().starts_with("route_idagency_idroute_short_nameroute_long_nameroute_descroute_typeroute_urlroute_colorroute_text_colorroute_sort_order\"-CISR-"));
    Ok(content.concat())
}

fn is_file_empty(file_path: &Path) -> bool {
    let file = match File::open(file_path) {
        Ok(v) => v,
        Err(e) if e.kind() == ErrorKind::NotFound => {
            return true;
        }
        Err(e) => panic!("{e}"),
    };
    let reader = BufReader::new(file);

    for (i, x) in reader.lines().enumerate() {
        if i == 1 || i == 2 {
            if x.unwrap().trim().is_empty() {
                if i == 1 {
                    continue;
                }
                panic!();
            } else {
                return false;
            }
        }
    }
    true
}

fn get_line_from_folder(file_path: &Path) -> i32 {
    let file = File::open(file_path.join("stop_times.txt")).unwrap();
    let reader = BufReader::new(file);

    for (i, x) in reader.lines().enumerate() {
        if i == 1 {
            let a = x.as_ref().unwrap();
            let a: Vec<&str> = a.split_once(',').unwrap().0.split('-').collect();

            assert!(a.len() == 4);
            assert!(a[0] == "CIST" || a[0] == "\"CIST");
            assert!(a[1].len() == 6);

            return a[1].parse().unwrap();
        }
    }
    panic!()
}

fn are_two_folders_identical(group: &Vec<String>, i: usize, j: usize) -> bool {
    let output = Command::new("git")
        .args([
            "-P",
            "diff",
            "--quiet",
            "--no-index",
            Path::new(SEARCH_DIR).join(&group[i]).to_str().unwrap(),
            Path::new(SEARCH_DIR).join(&group[j]).to_str().unwrap(),
        ])
        .envs(HashMap::from([
            // https://superuser.com/a/1255922
            ("GIT_CONFIG_NOSYSTEM", "1"),
            ("GIT_CONFIG_NOGLOBAL", "1"),
            ("HOME", ""),
            ("XDG_CONFIG_HOME", ""),
        ]))
        .output()
        .expect("Failed to execute command");

    let status: i32 = output.status.code().unwrap_or(-1);
    let stdout = String::from_utf8(output.stdout).unwrap();
    let stderr = String::from_utf8(output.stderr).unwrap();

    if !stderr.is_empty() || !stdout.is_empty() || status > 1 || status < 0 {
        panic!(
            "{} {} {} {group:#?} {} {}",
            dbg!(stderr),
            dbg!(stdout),
            dbg!(status),
            group[i],
            group[j]
        );
    }

    status == 0
}

fn get_agency_name_from_folder(file_path: &Path) -> String {
    let mut rdr = csv::Reader::from_path(file_path.join("agency.txt")).unwrap();

    let mut agencies = Vec::new();
    for ii in rdr.deserialize() {
        let record: Agency = ii.unwrap();

        agencies.push((record.agency_name, record.agency_url));
    }

    assert!(!agencies.is_empty());
    if agencies.len() > 1 {
        let mut hashset = HashSet::new();
        let mut hashset_2 = HashSet::new();
        for i in &agencies {
            hashset.insert(i.0.clone());
            hashset_2.insert(i.1.clone());
        }
        if hashset.len() == 1 {
            assert!(hashset_2.len() <= 2, "{agencies:#?} {file_path:?}");
            return agencies[0].0.clone();
        }
        for i in &agencies {
            assert!(!AGENCIES_TO_REMOVE.contains(&i.0.as_str()));
            assert!(!AGENCIES_TO_REMOVE_FOLDERS.contains(&i.0.as_str()));
        }
        return "_-_-_".to_owned();
    }

    agencies[0].0.clone()
}

fn get_route_long_name_from_folder(file_path: &Path) -> String {
    let mut rdr = csv::Reader::from_path(file_path.join("routes.txt")).unwrap();

    let mut route_long_name = String::new();
    for line in rdr.deserialize() {
        assert!(route_long_name.is_empty());
        let record: Routes = line.unwrap();

        route_long_name = record.route_long_name;
    }
    assert!(!route_long_name.is_empty());

    route_long_name
}

fn get_stop_times_from_folder(file_path: &Path) -> Vec<StopTimes> {
    let mut rdr = csv::Reader::from_path(file_path.join("stop_times.txt")).unwrap();

    let mut stop_times = Vec::new();
    let mut last_stop_sequence = 0;
    let mut last_trip_id = String::new();
    let mut last_record: StopTimes = StopTimes::default();
    let mut stop_sequences_hashset = HashSet::new();
    for line in rdr.deserialize() {
        let mut record: StopTimes = line.unwrap();
        if record.trip_id != last_trip_id {
            record.trip_id.clone_into(&mut last_trip_id);
            last_stop_sequence = 0;
            stop_sequences_hashset = HashSet::new();
        }

        if !stop_sequences_hashset.insert(record.stop_sequence) {
            panic!("{file_path:?} {record:?}");
        }
        assert!(
            record.stop_sequence >= last_stop_sequence,
            "{record:#?} {file_path:#?} {last_stop_sequence:#?} {last_record:#?}"
        );

        record.stop_sequence = last_stop_sequence;
        last_stop_sequence += 1;
        last_record = record.clone();

        stop_times.push(record);
    }

    stop_times
}

fn get_stop_names_from_folder(file_path: &Path) -> Vec<String> {
    let mut rdr = csv::Reader::from_path(file_path.join("stop_times.txt")).unwrap();

    let mut stop_ids = HashSet::new();
    for line in rdr.deserialize() {
        let record: StopTimes = line.unwrap();
        stop_ids.insert(record.stop_id);
    }

    let mut rdr = csv::Reader::from_path(file_path.join("stops.txt")).unwrap();

    let mut stop_names = Vec::new();
    for line in rdr.deserialize() {
        let record: Stops = line.unwrap();
        assert!(record.stop_name.split(',').count() <= 3);
        if stop_ids.contains(&record.stop_id) {
            stop_names.push(record.stop_name);
        }
    }

    stop_names
}

fn parse_time(time_str: &str) -> u32 {
    let mut parts = time_str.split(':');

    let hours: u32 = parts.next().unwrap().parse().unwrap();
    let minutes: u32 = parts.next().unwrap().parse().unwrap();
    let seconds: u32 = parts.next().unwrap().parse().unwrap();

    assert!(parts.next().is_none(), "Invalid time format");
    // assert!(seconds == 0, "{seconds} {time_str}");
    // assert!(seconds < 30, "{seconds} {time_str}");

    // (hours * 3600 + minutes * 60 + seconds) % (60 * 60 * 24)
    (hours * 3600 + minutes * 60) % (60 * 60 * 24)
}

fn regex_number() -> &'static Regex {
    static RE: OnceLock<Regex> = OnceLock::new();
    RE.get_or_init(|| Regex::new(r"\d+(,|\.)\d+").unwrap())
}

fn regex_brackets() -> &'static Regex {
    static RE: OnceLock<Regex> = OnceLock::new();
    RE.get_or_init(|| Regex::new(r"\[.*?\]").unwrap())
}

fn function_idk(file_path: &Path) -> Vec<(u32, String, String, Vec<StopTimes>, HashSet<String>)> {
    let mut rdr = csv::Reader::from_path(file_path.join("routes.txt")).unwrap();
    let mut transport_lines_in_ids_jmk = Vec::new();

    for ii in rdr.deserialize() {
        let record: Routes = ii.unwrap();

        if [0, 3, 11, 800, 1, 4].contains(&record.route_type) {
            if record.route_id.starts_with('L') && record.route_id.contains('D') {
                if record.route_short_name.starts_with('x')
                    || record.route_short_name.starts_with('P')
                    || record.route_short_name.starts_with("OLY")
                    || record.route_short_name.starts_with('E')
                    || record.route_short_name.starts_with('š')
                    || record.route_short_name.starts_with('N')
                {
                } else {
                    assert!(
                        record.route_id.split(['L', 'D']).collect::<Vec<_>>()[1]
                            == record.route_short_name,
                        "{:?}",
                        dbg!(record)
                    );
                }
                transport_lines_in_ids_jmk.push((
                    record.route_id.split(['L', 'D']).collect::<Vec<_>>()[1]
                        .to_owned()
                        .parse::<u32>()
                        .unwrap(),
                    record.route_long_name,
                    record.route_id,
                ));
            } else if record.route_id.starts_with('L') {
                if record.route_short_name.starts_with('A')
                    || record.route_short_name.starts_with('B')
                    || record.route_short_name.starts_with('C')
                    || record.route_short_name.starts_with('X')
                    || record.route_short_name.starts_with("IKEA")
                    || record.route_short_name.starts_with("MHD")
                    || record.route_short_name.starts_with("ŠKOLNÍ")
                    || record.route_short_name.starts_with('P')
                {
                } else {
                    assert!(
                        (1..=3).contains(&record.route_short_name.len())
                            || record.route_short_name.len() == 6
                    );
                    assert!(
                        record.route_short_name.len() == 6
                            || record.route_id.split('L').collect::<Vec<_>>()[1]
                                .get(
                                    record.route_id.split('L').collect::<Vec<_>>()[1]
                                        .to_string()
                                        .len()
                                        .saturating_sub(3)..
                                )
                                .unwrap()
                                == record.route_short_name,
                        "{:?}",
                        dbg!(record)
                    );
                }
                transport_lines_in_ids_jmk.push((
                    record.route_id.split(['L', 'D']).collect::<Vec<_>>()[1]
                        .to_owned()
                        .parse::<u32>()
                        .unwrap(),
                    record.route_long_name,
                    record.route_id,
                ));
            } else {
                assert!(!record.route_id.starts_with('L'), "{record:#?}");
                assert!(!record.route_id.contains('D'));
                assert!(record.route_id.parse::<u32>().is_ok());

                if let Ok(short_name) = record.route_short_name.parse::<u32>() {
                    transport_lines_in_ids_jmk.push((
                        short_name,
                        record.route_long_name,
                        record.route_id,
                    ));
                } else {
                    println!(
                        "WARNING: route_short_name is not a number  path: {file_path:?} record: {record:?}"
                    );
                }
            }
        } else {
            assert!(record.route_type == 2, "{:?}", dbg!(record));
        }
    }

    let mut rdr = csv::Reader::from_path(file_path.join("stop_times.txt")).unwrap();
    let mut stop_times_in_ids_jmk: HashMap<String, Vec<_>> = HashMap::new();

    for ii in rdr.deserialize() {
        let record: StopTimes = ii.unwrap();

        assert!(!record.trip_id.is_empty());
        stop_times_in_ids_jmk
            .entry(record.trip_id.clone())
            .or_default()
            .push(record);
    }

    let mut rdr = csv::Reader::from_path(file_path.join("trips.txt")).unwrap();
    let mut trips_in_ids_jmk: HashMap<String, Vec<_>> = HashMap::new();

    for ii in rdr.deserialize() {
        let record: Trips = ii.unwrap();

        assert!(!record.route_id.is_empty());
        trips_in_ids_jmk
            .entry(record.route_id.clone())
            .or_default()
            .push(record.trip_id);
    }

    let mut rdr = csv::Reader::from_path(file_path.join("stops.txt")).unwrap();
    let mut stops: HashMap<String, String> = HashMap::new();

    for ii in rdr.deserialize() {
        let record: Stops = ii.unwrap();

        assert!(!record.stop_id.is_empty());
        assert!(
            regex_number()
                .replace_all(&record.stop_name, "")
                .split(',')
                .count()
                <= 4,
            "{file_path:?} {record:#?}"
        );
        if stops
            .insert(record.stop_id.clone(), record.stop_name)
            .is_some()
        {
            panic!();
        }
    }

    let mut transport_lines = Vec::new();
    for i in &transport_lines_in_ids_jmk {
        let mut stop_times: Vec<StopTimes> = Vec::new();
        let mut stops_names = HashSet::new();
        let a = trips_in_ids_jmk.get(&i.2).unwrap();
        for ii in a {
            let b: &mut Vec<StopTimes> = stop_times_in_ids_jmk.get_mut(&ii.to_string()).unwrap();
            b.sort_by_key(|k| k.stop_sequence);
            let mut last_stop_sequence = 0;
            let mut last_record = StopTimes::default();
            let mut stop_sequences_hashset = HashSet::new();
            for bb in &mut *b {
                if !stop_sequences_hashset.insert(bb.stop_sequence) {
                    panic!()
                }
                assert!(
                    bb.stop_sequence >= last_stop_sequence,
                    "{bb:#?} {last_record:#?}"
                );

                bb.stop_sequence = last_stop_sequence;
                last_stop_sequence += 1;
                last_record = bb.clone();

                stops_names.insert(stops.get(&bb.stop_id).unwrap().to_owned());
            }
            stop_times.extend(std::mem::take(b));
        }

        transport_lines.push((i.0, i.1.clone(), i.2.clone(), stop_times, stops_names));
    }
    transport_lines
}

fn function_idk_2(
    ii: &str,
    transport_lines_in_ids_jmk_: &[(u32, String, String, Vec<StopTimes>, HashSet<String>)],
    agency_name: &str,
) -> Vec<(i32, String)> {
    let mut lines_to_remove = Vec::new();

    let line = get_line_from_folder(&Path::new(SEARCH_DIR).join(ii));
    let line_short = line
        .to_string()
        .get(line.to_string().len().saturating_sub(3)..)
        .unwrap()
        .parse()
        .unwrap();
    println!("{line} {agency_name}");
    let mut filter__ = transport_lines_in_ids_jmk_
        .iter()
        .filter(|x| x.0 == line_short);

    let mut is_duplicate_line = false;
    let mut filter;
    if let Some(first) = filter__.next() {
        filter = first.clone();

        for next in filter__ {
            is_duplicate_line = true;
            assert!(filter.0 == next.0);
            assert!(filter.1 == next.1);
            filter.3.extend(next.3.clone());
        }
    } else {
        println!("{line} {agency_name} wef");
        return lines_to_remove;
    }
    assert!(!filter.3.is_empty());
    assert!(!filter.4.is_empty());
    if is_duplicate_line {
        println!("WARNING: duplicate line in folder {ii}  line: {line}");
    }

    let a = route_long_names_distance(
        &filter.1,
        &get_route_long_name_from_folder(&Path::new(SEARCH_DIR).join(ii)),
    );

    println!(
        "{line} '{agency_name}' {a} '{}' '{}' nwer",
        filter.1,
        get_route_long_name_from_folder(&Path::new(SEARCH_DIR).join(ii))
    );

    if !((a < 30
        && a as f64 * 1.3 < filter.1.chars().count() as f64
        && a as f64 * 1.3
            < get_route_long_name_from_folder(&Path::new(SEARCH_DIR).join(ii))
                .chars()
                .count() as f64)
        || filter.1.trim().is_empty()
        || get_route_long_name_from_folder(&Path::new(SEARCH_DIR).join(ii))
            .trim()
            .is_empty())
    {
        println!(
            "WARNING: route_long_name big difference {line} '{agency_name}' {a} '{}' '{}'",
            filter.1,
            get_route_long_name_from_folder(&Path::new(SEARCH_DIR).join(ii))
        );
    }

    let stop_times = &filter.3;
    let stop_times_ = get_stop_times_from_folder(&Path::new(SEARCH_DIR).join(ii));
    assert!(!stop_times.is_empty(), "{filter:#?}");
    assert!(!stop_times_.is_empty());
    let mut stop_times_hashmap = HashSet::with_capacity(stop_times.len());
    let mut stop_times_hashmap_2 = HashSet::with_capacity(stop_times.len());
    let mut stop_times_hashmap_3 = HashSet::with_capacity(stop_times.len());
    for i in stop_times {
        assert!((7..=9).contains(&i.arrival_time.len()), "{i:#?}");
        assert!((7..=9).contains(&i.departure_time.len()), "{i:#?}");
        let arrival_time = parse_time(i.arrival_time.trim());
        let departure_time = parse_time(i.departure_time.trim());
        if arrival_time % 60 != 0 || departure_time % 60 != 0 {
            assert!(departure_time % 60 != 0, "{arrival_time} {departure_time}");
        }
        stop_times_hashmap.insert((arrival_time, i.stop_sequence));
        stop_times_hashmap_2.insert(arrival_time);
        stop_times_hashmap_3.insert(departure_time);
    }
    assert!(!stop_times_hashmap.is_empty());
    assert!(
        stop_times_hashmap.len() >= stop_times_hashmap_2.len(), // && stop_times_hashmap_2.len() == stop_times_hashmap_3.len(),
        "{} {} {} \n\n {:?} \n {:?} \n {:?}",
        stop_times_hashmap.len(),
        stop_times_hashmap_2.len(),
        stop_times_hashmap_3.len(),
        stop_times_hashmap,
        stop_times_hashmap_2,
        stop_times_hashmap_3
    );

    let mut count_stop_times = 0;
    let mut count_equal = 0;
    let mut count_almost_equal = 0;
    let mut count_not_equal = 0;
    for i in stop_times_ {
        count_stop_times += 1;

        assert!((7..=9).contains(&i.arrival_time.len()), "{i:#?}");
        assert!((7..=9).contains(&i.departure_time.len()), "{i:#?}");

        let time = parse_time(&i.arrival_time);

        if stop_times_hashmap.contains(&(time, i.stop_sequence)) {
            count_equal += 1;
        } else if stop_times_hashmap_2.contains(&(time + 60))
            || stop_times_hashmap_2.contains(&(time - 60))
            || stop_times_hashmap_3.contains(&(time))
        {
            count_almost_equal += 1;
        } else {
            count_not_equal += 1;
        }

        continue;
        for iii in &filter.3 {
            println!("{} {}", iii.arrival_time, iii.departure_time);
        }
        println!("-------------");
        for iii in get_stop_times_from_folder(&Path::new(SEARCH_DIR).join(ii)) {
            println!("{} {}", iii.arrival_time, iii.departure_time);
        }

        println!("{line}");
        println!("{i:#?}");
        panic!();
    }

    let mut count_stop_name_equal = 0;
    let mut count_stop_name_almost_equal = 0;
    let mut count_stop_name_not_equal = 0;

    let mut stop_names__ = Vec::new();
    for i in get_stop_names_from_folder(&Path::new(SEARCH_DIR).join(ii)) {
        let distance: usize = {
            let mut min_distance = usize::MAX;
            for x in &filter.4 {
                min_distance = usize::min(min_distance, stop_names_distance(x, &i));
                min_distance = usize::min(
                    min_distance,
                    stop_names_distance(
                        x,
                        i.split(',')
                            .collect::<Vec<&str>>()
                            .get(2)
                            .unwrap_or(&i.split(',').last().unwrap()),
                    ),
                );
                if min_distance == 0 {
                    break;
                }
            }
            min_distance
        };
        if filter.4.iter().any(|x| {
            normalize_stop_name(x) == normalize_stop_name(&i)
                || normalize_stop_name(x)
                    == normalize_stop_name(
                        i.split(',')
                            .collect::<Vec<&str>>()
                            .get(2)
                            .unwrap_or(&i.split(',').last().unwrap()),
                    )
                || match i.split_once(',') {
                    Some(v) => normalize_stop_name(x) == normalize_stop_name(v.1),
                    None => false,
                }
                || distance == 0
        }) {
            count_stop_name_equal += 1;
            stop_names__.push((i, "equal"));
        } else if filter.4.iter().any(|x| {
            levenshtein(x, &i) < 5
                && x.len() as f64 > (levenshtein(x, &i) + 1) as f64 * 1.6
                && i.len() as f64 > (levenshtein(x, &i) + 1) as f64 * 1.6
                && println!("{x} {i} {}", levenshtein(x, &i)).cmp(&()).is_eq()
        }) || distance == 1
        {
            count_stop_name_almost_equal += 1;
            stop_names__.push((i, "almost equal"));
        } else {
            count_stop_name_not_equal += 1;
            stop_names__.push((i, "not equal"));
        }
    }

    if count_not_equal * 60 >= count_equal && count_almost_equal * 5 >= count_equal {

        println!(
            "{} {} {} {} {} no {}",
            count_stop_times, count_equal, count_almost_equal, count_not_equal, line, agency_name
        );
        if count_stop_name_not_equal * 2 < count_stop_name_equal {
            println!("WARNING: something is weird with line : {line}  stop_names: {count_stop_name_equal}/{count_stop_name_almost_equal}/{count_stop_name_not_equal}");
        }

    } else {

        if count_stop_name_not_equal * 2 < count_stop_name_equal {
            println!(
                "{} {} {} {} {} ok {} {} {}",
                count_stop_times,
                count_equal,
                count_almost_equal,
                count_not_equal,
                line,
                agency_name,
                stop_times.len(),
                stop_times
                    .iter()
                    .map(|x| &x.arrival_time)
                    .collect::<HashSet<_>>()
                    .len()
            );
            lines_to_remove.push((line, ii.to_owned()));
        } else {
            if count_stop_name_not_equal < count_stop_name_equal
                || count_equal > 100
                || count_stop_name_equal > 0
            {
                println!("WARNING: something is weird with line : {line}");
            }
            println!(
                "{} {} {} {} {} ok/no {} {} {} {} {} {}\nbingo: {:#?} {:#?}",
                count_stop_times,
                count_equal,
                count_almost_equal,
                count_not_equal,
                line,
                agency_name,
                stop_times.len(),
                stop_times
                    .iter()
                    .map(|x| &x.arrival_time)
                    .collect::<HashSet<_>>()
                    .len(),
                count_stop_name_equal,
                count_stop_name_almost_equal,
                count_stop_name_not_equal,
                stop_names__,
                filter.4
            );

            assert!(count_stop_name_not_equal > count_stop_name_almost_equal, "{line}  stop_names: {count_stop_name_equal}/{count_stop_name_almost_equal}/{count_stop_name_not_equal}\n");
        }

    }

    lines_to_remove
}

fn has_duplicates(vec_of_vecs: &Vec<(String, Vec<(i32, String)>)>) -> bool {
    let mut seen = HashSet::new();

    for vec in vec_of_vecs {
        for num in &vec.1 {
            assert!(seen.insert(num.0), "{num:#?}");
        }
    }

    false
}

fn remove_folder(path: PathBuf) {
    if !DRY_RUN {
        println!("deleted: {path:?}");
        fs::remove_dir_all(path).unwrap();
    }
}
