import React, { ReactNode } from "react";
import { Link } from "carbon-components-react";

/*
"26129f2843278326d44120629de07e9d": <></>, // Overview
"a15ba2cc90ee038354ac67e50df397d5": <></>, // Purpose
"c9b05afcdf7f8ac774373f8cba2e0e16": <></>, // Intended Domain
"77652a3f7be7955568d33bc316565959": <></>, // Training Data
"92fbaf8c3ec0ce87558c745850bd0cd6": <></>, // Model Information
"682b5d2757e1d4f4999c7cb4174eecdb": <></>, // Inputs and Outputs
"072d8ff90f49eed9f6acf05ee7531549": <></>, // Performance Metrics
"02f030fc5e6a3117d18eca609db814e4": <></>, // Test Data
"28bad280c2108fda9b5b394e35602907": <></>, // Optimal Conditions
"bbc08b597aacb31b7c420b696ee4d9f4": <></>, // Poor Conditions
"4d831fc91b16e380933bda777363cdc5": <></>, // Explanation
*/

type Rationales = {
    [model_id: string]: {
        [facttype_id: string]: ReactNode
    }
};

const audio_optimal_and_poor = <>The information provided in this and the previous section is quite generic; the general assumption that any model performs better with higher quality input and input closer to the training dataset is not surprising. Poor conditions are just the opposite of optimal conditions for now. It might be helpful to provide additional details if known, such as how well the classifier performs with particular kinds of noise in the training data or input signal. This could allow the reader to judge whether the classifier was suitable for the kinds of data they had.</>;

export const rationales : Rationales = {
    "max_audio_classifier": {
        "a15ba2cc90ee038354ac67e50df397d5": <>We wanted to be sure the reader could get the information that was needed about the model from just this one section. The first two or three lines should be sufficient to convey the purpose. We also included a summary of relevant information in the second paragraph. It wasn’t too difficult to gather the information presented here. Information would also be well suited to abstract or summary of the model.<br/>
        This section tells the user what to expect from the model. It is the one section we expect the user to read and take home, even if they don't read any of the remaining sections. </>,
        "c9b05afcdf7f8ac774373f8cba2e0e16": <>What we wanted to convey here is where the model can likely be applied. There are some surprising difficulties in answering this question. What is the meaning of the word "domain"? Is the audio classification problem a domain as opposed to, say, a video classification problem? This might be an interesting distinction from a more academic or research perspective. Someone else might view a more important distinction as having a solution focus like "manufacturing" as opposed to "retail".
        <br/><br/>
        Perhaps this could be simplified for the FactSheet writer by providing something like a hierarchy of domains. This could structure the space, making it more concrete and understandable.</>,
        "77652a3f7be7955568d33bc316565959": <>We hoped to convey that a lot of data went into model training, covering a wide variety of classes. Since the dataset is not well known outside of audio classification/processing circles, it was also important to provide some detail. But it is hard to know how to summarize this in terms of things like distributions given how large the dataset is and how many classes it contains. Rather than putting all of this into the FactSheet itself, links to external information about the dataset may be the best approach.</>,
        "92fbaf8c3ec0ce87558c745850bd0cd6": <>It is hard to convey how models are structured in plain English. Understanding this requires a pretty deep understanding of ML. Talking about "attention" layers and "embeddings", for example, won’t be too helpful otherwise. This was particularly difficult for this FactSheet since we are actually talking about two models which are big projects in themselves; this section discusses both of them. We attempted to give a "big picture" overview in a way that even a novice might understand. And while the FactSheet doesn’t contain enough information for those who would replicate or extend the models, it probably suffices for the sort of catalog description about what is available in the Model Asset Exchange (MAX).
         <br/><br/>
        Overall, it is difficult to find the right balance between explaining underlying concepts and algorithms while still being mindful of keeping things short enough that readers aren’t put off. FactSheet readers will also have widely varying backgrounds - data scientists, software engineers, novices and so on - further complicating the task of crafting the perfect two-paragraph summary.  
        <br/><br/>
        Finally, we wondered if it might be important to add information on how the model was trained since it would give a sense of whether/how/how long /how many iterations might be needed to train on a different dataset. This, along with the resulting quality/metrics/performance, might be good to have in this model information section rather than somewhere else.</>,
        "682b5d2757e1d4f4999c7cb4174eecdb": <>The information provide here is pretty much a copy of the API spec that we already have on the associated MAX information page. We didn’t want to add too much or change this as this information will likely be used by software engineers who are comfortable with this sort of presentation. However, we did make explicit here how audio clips shorter or longer than 10 seconds are handled.</>,
        "072d8ff90f49eed9f6acf05ee7531549": <>This includes generic metrics such as Mean Average Precision and Area Under the Curve. It also includes one metric, d', that is typically used with audio/signal detection. This mix should cater to both specialists in signal detection and non-specialist data scientists and developers.
        <br/><br/>
        It isn’t clear how to approach questions of fairness and bias. This might require transforming the data to make it possible to use algorithms such as those in the <Link href="https://aif360.res.ibm.com/" target="_blank" className="font-size-16">AI Fairness 360 Toolkit</Link>. But this also might require a lot of different experiments so might not be worth the effort here.
        <br/><br/>
        Finally, it might be useful to see how a model compares to state of the art. This is included in the longer paper describing the audio classifier but might not be useful in a FactSheet.</>,
        "02f030fc5e6a3117d18eca609db814e4": <>Describing the test data is fairly straightforward. In general, if test data is randomly drawn from the same dataset as the training data, it is probably sufficient to just describe the split percentages which is what we do here.</>,
        "bbc08b597aacb31b7c420b696ee4d9f4": audio_optimal_and_poor,
        "4d831fc91b16e380933bda777363cdc5": <>The audio classifier does not provide explanations of its classifications. But just stating this may not be entirely helpful. Providing hints here of how to analyze particular problem cases might make sense. For example, if it performed poorly, looking at the training data for clues would be one approach. It might also be possible to explore this a bit more by using the capabilities of the <Link href="https://aix360.res.ibm.com/" target="_blank" className="font-size-16">AI Explainability 360 Toolkit</Link></>,
    }
}