so-un-bot/data/questions/fds.json

2693 lines
116 KiB
JSON
Raw Normal View History

2024-10-18 18:18:03 +02:00
[
{
"quest": "What class does the Naive Bayes classifier predict for a given observation?",
"answers": [
{
"text": "The class maximizing the joint predictors probability",
"image": ""
},
{
"text": "The class minimizing the joint predictors probability",
"image": ""
},
{
"text": "The class maximizing the joint predictors/labels probability",
"image": ""
},
{
"text": "The class minimizing the joint predictors/labels probability",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "If your dataset has two variables \ud835\udc99, \ud835\udc99\u2032 such that \ud835\udc99 = \ud835\udc82 \u22c5 \ud835\udc99\u2032 for some constant a > 0, then you have:",
"answers": [
{
"text": "overfitting",
"image": ""
},
{
"text": "underfitting",
"image": ""
},
{
"text": "multicollinearity",
"image": ""
},
{
"text": "supercollinearity",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "A na\u00efve Bayes classifier can deal with previously unseen feature-label combination through:",
"answers": [
{
"text": "Laplacian smoothing",
"image": ""
},
{
"text": "Bootstrapping",
"image": ""
},
{
"text": "Stratified cross-validation",
"image": ""
},
{
"text": "Repeated sampling",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "For a linear regression model, the expected squared error can be decomposed in:",
"answers": [
{
"text": "Variance and covariance",
"image": ""
},
{
"text": "SSE and SST",
"image": ""
},
{
"text": "Underfit and overfit",
"image": ""
},
{
"text": "Bias and variance noise",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": " What is the key assumption of the Na\u00efve Bayes Classifier?",
"answers": [
{
"text": "The predictors and labels are independent",
"image": ""
},
{
"text": "Each predictor follows a Gaussian distribution",
"image": ""
},
{
"text": "The predictors are independent conditionally on the label",
"image": ""
},
{
"text": "The number of predictors is at most poly(n)",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Which one of the following performance indicates the best model for prediction?",
"answers": [
{
"text": "\ud835\udc45' = 0.2 on training, \ud835\udc45' = 0.1 on test",
"image": ""
},
{
"text": "\ud835\udc45' = 0.7 on training, \ud835\udc45' = 0.7 on test",
"image": ""
},
{
"text": "\ud835\udc45' = 0.8 on training, \ud835\udc45' = 0.1 on test",
"image": ""
},
{
"text": "\ud835\udc45' = 0.9 on training, \ud835\udc45' = \u22120.9 on test",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "You want to predict the market price of a team\u2019s merchandising (t-shirts, hats..), according to the team\u2019s seasonal performance. You suggest using:",
"answers": [
{
"text": "Linear regression",
"image": ""
},
{
"text": "Logistic regression",
"image": ""
},
{
"text": "Linear programming",
"image": ""
},
{
"text": "Clustering",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "When is the accuracy a misleading classifier performance measure?",
"answers": [
{
"text": "When the population label proportions are unbalanced",
"image": ""
},
{
"text": "When the population label proportions are balanced",
"image": ""
},
{
"text": "When the sensitivity is high",
"image": ""
},
{
"text": "When the specificity is low",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "The goal of linear regression is to?",
"answers": [
{
"text": "Make America great again",
"image": ""
},
{
"text": "Group similar observations together",
"image": ""
},
{
"text": "Learn a linear function from data",
"image": ""
},
{
"text": "Evaluate the amount of noise in the data",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "In the bias-variance decomposition of the expected squared error, what does high bias suggest?",
"answers": [
{
"text": "Noisy data",
"image": ""
},
{
"text": "Overfitting",
"image": ""
},
{
"text": "Underfitting",
"image": ""
},
{
"text": "Crossfitting",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Social network users often form communities according to their tastes. If you had access to their personal data, you may verify this intuition by:",
"answers": [
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
},
{
"text": "Linear programming",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "R^2 is a measure of:",
"answers": [
{
"text": "Reliability of predictions",
"image": ""
},
{
"text": "Goodness of fit",
"image": ""
},
{
"text": "Significance of estimates",
"image": ""
},
{
"text": "Model complexity",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "A company wants to relate the monthly revenue to productivity parameters such as total number of working hours, etc. They could use:",
"answers": [
{
"text": "Linear regression",
"image": ""
},
{
"text": "Logistic regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
},
{
"text": "Linear programming",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "How do you perform a linear regression in R?",
"answers": [
{
"text": "lm(y ~ x, data)",
"image": ""
},
{
"text": "lm(y ~ x, data, family = \u201cbinomial\u201d)",
"image": ""
},
{
"text": "predict(y ~ x, data)",
"image": ""
},
{
"text": "predict(y ~ x, data, binomial)",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Your friend proposes to cluster 300 observations by trying all possible clustering and taking the one that minimizes intra cluster variance. You observe that:",
"answers": [
{
"text": "This is the only possible approach",
"image": ""
},
{
"text": "This does not produce a good clustering",
"image": ""
},
{
"text": "This does require a few seconds",
"image": ""
},
{
"text": "This does require a centuries",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Single-linkage clustering works by",
"answers": [
{
"text": "Repeatedly recomputing the centroids of clusters",
"image": ""
},
{
"text": "Repeatedly merging smaller clusters into larger ones",
"image": ""
},
{
"text": "Enumerating all possible clustering of the given points",
"image": ""
},
{
"text": "Enumerating all possible points in a cluster",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "In linear regression, a high value of \ud835\udc79\ud835\udfd0 on the training set suggests:",
"answers": [
{
"text": "A small error of the model on the fitted data",
"image": ""
},
{
"text": "A small error of the model on future predictions",
"image": ""
},
{
"text": "A large error of the model on the fitted data",
"image": ""
},
{
"text": "A large error of the model on future predictions",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "A logistic regression gives the following scores, preceded by the actual label: (Y, 0.85), (Y, 0.75), (N,0.6), (Y,0.5), (N, 0.4), (N, 0.2). For a sensitivity of at least 2/3, the best choice is to predict Y when the score is at least:",
"answers": [
{
"text": "0.9",
"image": ""
},
{
"text": "0.75",
"image": ""
},
{
"text": "0.6",
"image": ""
},
{
"text": "0.45",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Look at the confusion matrix below. What we can say?",
"answers": [
{
"text": "The sensitivity is < 0.80%",
"image": ""
},
{
"text": "There are less positives than negatives",
"image": ""
},
{
"text": "The accuracy is > 90%",
"image": ""
},
{
"text": "The classifier predicts 1 on 60% of the times",
"image": ""
}
],
"correct": 3,
"image": "iVBORw0KGgoAAAANSUhEUgAAANMAAACaCAYAAAAzUH19AAAABHNCSVQICAgIfAhkiAAAABl0RVh0U29mdHdhcmUAZ25vbWUtc2NyZWVuc2hvdO8Dvz4AAAAndEVYdENyZWF0aW9uIFRpbWUAbWVyIDE4IGdlbiAyMDIzLCAxMTo0MDoyMUvqiMIAACAASURBVHic7V17dFXVmf+dc595E0KekIRXIIA8g0UQRFsfI2oxjoiu8pCF2kGlSl3tqEidUakUX6AUEWFclYVDZdpi1SntoAWEoaNQGwpGYkKEREgCeZDnvee15w/4Nvuc3Jtgmntz7+X81sq6r51z9rfP/vb37e+1JcYYgw0bNv5hyH3dARs2YgU2M9mw0UuwmcmGjV6CzUw2bPQSbGayYaOXYDOTDRu9BKdhGGCMweFwQNd1AIAkSZAkydTQ+rk3QdZ5xhi/N2MMjDEYhgGHwwHGGGRZhmEYkOXQrQGip4D6o+s6HA4HAHS6fyjHxUZ4Qc+bXjVN48+dMQZd1+F0Onn7Tjyi6zqjxsRQNHH7YqIYhgFJkkxMFKjjoYbIVHRv6huNj43YgshItJgTUzmdToiCh9qb/t8wDGYYBnRdh8vlAgD4fD5IkgRZlvkFQwlRMqmqClmW4XA4OFEAwtoXYmaSSrSwaJoGWZZtyRSjsGolAPgcII3J6/UG1Y4kVVWZOHHtVdeGjeDoapshGYbBAEDXdRiGAZfLha1bt+LQoUP8u1CvvtRBun9aWhrq6upMqwL9Hi4pCYDfl6STLMtITU1FfX19WPpiI7wIpNrTPsnhcOCqq67CXXfdxffQAdU82uh3dHQgKSkJo0aNwoQJEzBs2DA+mcNBCEnHVatW4YknnuAT1qrLhmsSE5OTnnzixAls27YNP/3pT6GqqmkzaiN2IcsyysvL8fnnn+PYsWPw+XzweDyd56GmaUzTNGYYBmtra2OGYbArr7ySVVZWsr5CYWFhn927K1RVVbHp06f3dTds9AEqKyvZlClTmGEYzOfzsQtCyAQnrbxMsOAZhoGqqirk5OSE3BRthaZpaGlpgaIofWbJs4JdkIYnT57kffP7/fB4PH3aLxuhhcgX1dXV0DSty7noFH8U3zudTrjdbr5fCAfo/rIsw+12c2Lou3DtU2hBoXuyC2qey+Uy9Y1ebcQOxGcvMpPopqF2VjjFCSu+UuNADtxQQryX2A96H+79m9gncays/bMRG7A+e3EOBJubBJkaiY0jZYIEk5rhuq91AAN9thFbsPJAsGcc6HvboWTDRi/Btu1GAQKpnYZhQFXVTm0dDgecTmcn/f5S93ZWN4SmaWCMweVy8QgV2jsGUoO7ul5X9Fj7q2ka9zuG2yXSU9jMFMEQ94nWySTLclBrIsWSWa8T6NrB7kmvFGIGnJ/k4j0D7WfF6xqG0SnGUvw/6/+LfbD68CKdkQCbmSIa1tVbjFIpLy/HO++8w73xLpcLPp8P119/PWbMmMGjnMVQMfosWirJw0/fiVHyhmHgd7/7HU6ePIkHH3wQ1dXV+M///E/cfffdGDZsWKd+kuVL0zQA4AwRiFmC0UnMt2vXLnz22Wd44IEHkJaWZnLgRypsZopg0CQ0DAOAeSX/6quv8PTTT3f6nzfffBObN2/GzTffzCWD6F4gtc3v95tSW4gZdF2Hpmk8wmPLli3Yt28ffvjDH+L//u//sGLFCgwaNAjDhw/nap/ItOSLcblc+PLLL1FTU4Pp06fz+7e1tcHlcvEQHTGgmOhkjOHDDz/E2rVr8f3vf58zU6RHnER272x0clPQpHU6nUhNTcXDDz+M66+/HowxnDhxAg899BB+9atf4frrr4fb7e50PU3T4HK5TL8RM5B6JU7a/v37Y+jQofD5fLjttttw5MgRDBo0CKqqwuFwdLoH7an27duHBx54AK+88gqcTif8fj+cTieSkpJM7YmZnE6nSeokJiZiyJAhiIuL47RH+r7JtuZFMKzGAJIyuq7D6/VC0zRMmzYN11xzDWbOnIkFCxZg8uTJqKyshM/nw8svv4zvfe97ePvttzF69Gg89dRTXNqMGTMGo0aNwurVq+FwOHjqy5EjRzB16lSMGTMGb7zxBtxuN1paWuB2u3H48GE8/PDDKCkpgcPhgCzL+OUvf4nRo0dj9OjReOKJJ+ByuXDgwAHcfvvtKC0txZIlS/D000/D4/Hg4MGDKCoqwujRo3H77bejubmZS6jGxkbMmDEDhYWFeOGFFyBJEjo6OqLKKW5LpgiG1RhAqpjD4YDP54Ou6zhz5gy36pWXl+Prr7/G8OHD4fV60dDQgI8//hj79++H3+9HSkoKtm7diqVLl+LcuXMAgGeeeQYOhwM//vGPUVZWhnnz5qGkpAQA8Oijj4IxhvHjx8Pv96OtrQ27d+/GmTNnIMsyNmzYgB/96EdcPSstLYUkSbj66qtRX1+PhIQEVFZWoq6uDkeOHME999yD8vJyyLKM0tJS3HvvvXjnnXfQ3NyMm2++GX/9618BAMuXL4fD4cDgwYPh8/n4eESyVAJsyRTREJlITFCjPYksy1iwYAHcbjfcbjdGjx6NU6dOobi4mFvhHA4HVq9eDU3T8MADD+CFF17A6NGjceLECZw9exZ33nknNmzYgNraWnz++ecoKSnByy+/DE3TcODAAaSlpfHrMMbg8XjgdrvR2tqKrVu3orCwEJWVlVBVFY888ggOHTqEq666Cps2bUJbWxt++9vfYv369XjrrbfQ0tKC3bt3o66uDps2bcKhQ4dw8OBBHD9+HH//+9+xbNkyaJqGr776CmPHju2U5hDpUsqWTBGMQGEtlO1LyMzMRL9+/fjn559/HsXFxfxzUlISpk+fDofDgbq6OjgcDhw4cAD5+fm8TWJiIhoaGlBdXY0JEybgtttuAwBMmDABs2fPxscffwyXy8XVTMYYGhsb0dTUhHvvvReDBw9Ge3s71qxZw69J0oqY8OzZs6itrcW1115rovHYsWNISkrC4MGDMXfuXABAfn4+br31VrzxxhuIj483jUckw2amKICYHEmrNe1x3njjDcyePdvUnoKT/X4/vF4v0tPT+XXOnTuHzMxMzJw5k19zwIABiIuLgyzLOHfuHNrb23mdg5qaGp67Qz4mj8fDrX11dXUAgPj4eFRVVaG6uhrjxo2D2+1GcnIyEhMTubUwKSkJU6dORVpaGi+TMGrUKFRXV0NVVZw9e5bft6mpCQC4mT0aYDNTFMHhcHDTNlnd2tvboSgKN4OTdU2SJMTFxfEiOYZhIDc3F1dccQXKy8uxePFiJCQkwO/3wzAMDBkyhKtsK1euxNy5c1FSUoIPPvgA06ZNg6Zp8Pv9cLlcUFUV2dnZGDJkCDZs2IChQ4ciPT0dL7zwAo4fP46//vWvGDRoELKysrBz505kZWVhypQp+PDDD3HXXXdh1KhRPI1lzJgxSExMxMmTJ/Hqq6/C5/PhxIkT2LJlC7caBgrAjkgwxtiFoirM5/MxxhgrKipiBw4cYIwxput6pySoUEJRFJabm8vEflEiVqCErFBBvCeNwaeffsomTpzIGGN8rEINuremaczv9/NEzvfff58BYBs3bmSMnR836quu60xVVfbggw8yAKy8vJzpus4Mw2ClpaVs3LhxDAD/e/TRR1lHRwdrbGxkS5cu5d9nZWWx/Px8lpOTw+rr69nOnTsZALZ161bGGGOHDh1ihYWFvL3D4WBbtmxhhmGwzz//nKWmpjIA7Nprr2XffPMNu+eee0z3vemmm1hFRQVTVZU9+eST/Pt+/fqxwsJClpSUxA4fPswYY0xV1bA9/0DP/sCBA6yoqIgxFvzZ25IpgsEChPZQbcMJEybgtddew4wZM0zl0cRIhnvuuQeTJk1C//79udpXWFiIbdu2Yf/+/VAUBVlZWbjjjjtgGAa8Xi9WrlyJ8ePHQ9d1XHfddWhoaEBpaSkSExNRUFCAdevWYerUqTAMA5MmTcL27duxd+9eOBwO5Ofn45/+6Z+gaRpGjhyJzZs3o6amBnl5ecjMzMTGjRtx3XXX8XvNmTMH8fHxYIxh5cqVKCgoQFtbG6ZNmwaXy
},
{
"quest": "A set of observations (\ud835\udc99\ud835\udfcf, \ud835\udc9a\ud835\udfcf), (\ud835\udc99\ud835\udfd0,\ud835\udc9a\ud835\udfd0)\u2026(\ud835\udc99\ud835\udc8f, \ud835\udc9a\ud835\udc8f) obeys the law \ud835\udc9a\ud835\udc8a \u2254 \ud835\udc82\ud835\udc99\ud835\udc8a + \ud835\udc83 + \ud835\udf3a\ud835\udc8a, where \ud835\udf3a\ud835\udc8a is some random noise. The task of estimating a and b from the dataset is called:",
"answers": [
{
"text": "Logistic regression",
"image": ""
},
{
"text": "Linear regression",
"image": ""
},
{
"text": "Linear programming",
"image": ""
},
{
"text": "Logistic programming",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Laplacian smoothing aims at:",
"answers": [
{
"text": "Producing readable plots by using an average window",
"image": ""
},
{
"text": "Reducing the model\u2019s dependence on the noise",
"image": ""
},
{
"text": "Improving the feature quality by removing outliers",
"image": ""
},
{
"text": "Avoid penalizing previously unseen observations",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "A dataset of points (\ud835\udc99\ud835\udfcf, \ud835\udc9a\ud835\udfcf), (\ud835\udc99\ud835\udfd0,\ud835\udc9a\ud835\udfd0)\u2026(\ud835\udc99\ud835\udc8f, \ud835\udc9a\ud835\udc8f) has been generated by the model \ud835\udc9a\ud835\udc8a \u2254 \ud835\udc82\ud835\udc99\ud835\udc8a + \ud835\udc83 + \ud835\udf3a\ud835\udc8a, where \ud835\udf3a\ud835\udc8a is gaussian noise. Linear regression aims at estimating:",
"answers": [
{
"text": "a and b",
"image": ""
},
{
"text": "a and \ud835\udf00",
"image": ""
},
{
"text": "x and b",
"image": ""
},
{
"text": "x and y",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Which one of the following R commands selects only the rows of data where X equals 0?",
"answers": [
{
"text": "select(data, X == 0)",
"image": ""
},
{
"text": "filter(data, X==0)",
"image": ""
},
{
"text": "summarize(data, X==0)",
"image": ""
},
{
"text": "table(data, X == 0)",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "If an algorithm has exponential complexity, then we can assume that:",
"answers": [
{
"text": "In practice it is still fast enough to be useful",
"image": ""
},
{
"text": "It admits a polynomial-time algorithm",
"image": ""
},
{
"text": "It can be solved by finding an optimal clustering",
"image": ""
},
{
"text": "No technological progress will ever make it practical",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "If you have n points, what is the number of clusters that minimizes the within-cluster sum of square?",
"answers": [
{
"text": "1",
"image": ""
},
{
"text": "k",
"image": ""
},
{
"text": "n",
"image": ""
},
{
"text": "We cannot say",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Which regression model has smaller squared error in fitting a real function \ud835\udc87(\ud835\udc99)?",
"answers": [
{
"text": "A simple linear regression",
"image": ""
},
{
"text": "A logistic regression",
"image": ""
},
{
"text": "A polynomial regression of degree 2",
"image": ""
},
{
"text": "A polynomial regression of degree 10",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "A doping screening is tested on a pool of 800 athletes of which 796 are clean. The test is correct in 99% of the cases. What can we say about it?",
"answers": [
{
"text": "It may have missed all of the doped athletes",
"image": ""
},
{
"text": "It may have missed all of the clean athletes",
"image": ""
},
{
"text": "It identified all of the doped athletes",
"image": ""
},
{
"text": "It identified all of the clean atheletes",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "In linear programming, the space of feasible solution is:",
"answers": [
{
"text": "An arbitrary set",
"image": ""
},
{
"text": "A subset of \ud835\udc45'",
"image": ""
},
{
"text": "A convex polytope",
"image": ""
},
{
"text": "None of the above",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "The explained variance of a clustering equals:",
"answers": [
{
"text": "Within-cluster SSE divided by total sum of squares",
"image": ""
},
{
"text": "Total sum of squares divided by within-cluster SSE",
"image": ""
},
{
"text": "Within-cluster SSE divided by between-cluster SSE",
"image": ""
},
{
"text": "Total sum of squares divided by between-cluster SSE",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Gradient descent is a technique we have used to:",
"answers": [
{
"text": "Compute the optimal number of clusters",
"image": ""
},
{
"text": "Reduce the noise in the training set",
"image": ""
},
{
"text": "Find the local minima of a function",
"image": ""
},
{
"text": "Estimate the probability of false positive",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Which of these models is probably overfitting?",
"answers": [
{
"text": "\ud835\udc45' = 0.1 on training, \ud835\udc45' = 0.1 on test",
"image": ""
},
{
"text": "\ud835\udc45' = 0.8 on training, \ud835\udc45' = 0.7 on test",
"image": ""
},
{
"text": "\ud835\udc45' = 0.7 on training, \ud835\udc45' = 0.7 on test",
"image": ""
},
{
"text": "\ud835\udc45' = 0.8 on training, \ud835\udc45' = 0.1 on test",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Laplacian smoothing aims at:",
"answers": [
{
"text": "Improving the feature quality by removing outliers",
"image": ""
},
{
"text": "Producing readable plots",
"image": ""
},
{
"text": "Reducing the model\u2019s dependence on the noise",
"image": ""
},
{
"text": "Avoid penalizing previously unseen observations",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "To visualize a hierarchical clustering one can use:",
"answers": [
{
"text": "a dendrogram ",
"image": ""
},
{
"text": "a ROC curve",
"image": ""
},
{
"text": "a boxplot",
"image": ""
},
{
"text": "a histogram",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "The goal of linear regression is to:",
"answers": [
{
"text": "bring peace to the world",
"image": ""
},
{
"text": "group similar observations together",
"image": ""
},
{
"text": "learn a linear function from data ",
"image": ""
},
{
"text": "evaluate the amount of noise in the data",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Naive Bayes classiers work well for:",
"answers": [
{
"text": "linear programming",
"image": ""
},
{
"text": "spam filtering ",
"image": ""
},
{
"text": "k-center clustering",
"image": ""
},
{
"text": "speech recognition",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "The explained variance of a clustering equals:",
"answers": [
{
"text": "(total variance)/(within-cluster variance)",
"image": ""
},
{
"text": "(within-cluster variance)/(between-cluster variance)",
"image": ""
},
{
"text": "(between-cluster variance)/(total variance) ",
"image": ""
},
{
"text": "(within-cluster variance)/(total variance)",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "A binary classifier on 6 points gives the probabilities: 0.9, 0.85, 0.75, 0.5, 0.4, 0.3; the correct labels are 1,1,0,1,0,0. What is the best probability threshold, if we need FPR <= 1/3?",
"answers": [
{
"text": "0.45 ",
"image": ""
},
{
"text": "1.0",
"image": ""
},
{
"text": "0.95",
"image": ""
},
{
"text": "0.25",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Mark the wrong statement about gradient descent:",
"answers": [
{
"text": "batch gradient descent approximates \u25bdf using a mini-batch",
"image": ""
},
{
"text": "stochastic gradient descent approximates \u25bdf with a single example",
"image": ""
},
{
"text": "there is no guarantee to nd the global minimum",
"image": ""
},
{
"text": "increasing the learning rate damps oscillations ",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Which task does not require to learn a model?",
"answers": [
{
"text": "Clustering ",
"image": ""
},
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Classication",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "For two sets A, B the probability that the first element in a random permutation of A U B is in A \u2229 B:",
"answers": [
{
"text": "is J(A,B) / |A \u2229 B|",
"image": ""
},
{
"text": "is J(A,B) ",
"image": ""
},
{
"text": "is 1/|A|+1/|B|",
"image": ""
},
{
"text": "is 1/(|A||B|)",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "The R^2 and the p-values of a regression:",
"answers": [
{
"text": "are always equivalent",
"image": ""
},
{
"text": "cannot be both positive",
"image": ""
},
{
"text": "measure different aspects ",
"image": ""
},
{
"text": "are negatively correlated",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "xXmini=1k||x-ci||22 is the objective function of:",
"answers": [
{
"text": "k-squares",
"image": ""
},
{
"text": "k-medians",
"image": ""
},
{
"text": "k-centers",
"image": ""
},
{
"text": "k-means",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Classification accuracy is misleading when:",
"answers": [
{
"text": "the label proportions are unbalanced ",
"image": ""
},
{
"text": "the label proportions are balanced",
"image": ""
},
{
"text": "the dataset is too small",
"image": ""
},
{
"text": "the dataset is too large",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "A binary classifier on 6 points gives the probabilities: 0.85, 0.75, 0.65, 0.5, 0.4, 0.2; the correct labels are 1,1,1,0,0,0. What is the best probability threshold?",
"answers": [
{
"text": "0.3",
"image": ""
},
{
"text": "0.6 ",
"image": ""
},
{
"text": "0.7",
"image": ""
},
{
"text": "0.9",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "An algorithm is considered practical if its running time, as a function of the input size, is:",
"answers": [
{
"text": "exponential",
"image": ""
},
{
"text": "polynomial ",
"image": ""
},
{
"text": "linear",
"image": ""
},
{
"text": "logarithmic",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "The naive Bayes classier learns:",
"answers": [
{
"text": "the marginal distribution of predictors",
"image": ""
},
{
"text": "the joint distribution of predictors",
"image": ""
},
{
"text": "the joint distribution of predictors and labels ",
"image": ""
},
{
"text": "the marginal distribution of labels",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "k-PCA differs from k-means in that xi is:",
"answers": [
{
"text": "any PCA component",
"image": ""
},
{
"text": "any linear combination of PCA components ",
"image": ""
},
{
"text": "orthogonal to all PCA components",
"image": ""
},
{
"text": "any a convex combination of PCA components",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "In logistic regression, the estimated probability of xi being a positive is:",
"answers": [
{
"text": "1/(1+e-Tx) ",
"image": ""
},
{
"text": "1/(1+|x|2)",
"image": ""
},
{
"text": "log(Tx/(1-Tx))",
"image": ""
},
{
"text": "log(xi)",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "A sports betting agency wants to predict whether the Italian national football team will or not qualify for the World Cup championship. They should use:",
"answers": [
{
"text": "Clustering",
"image": ""
},
{
"text": "Logistic regression ",
"image": ""
},
{
"text": "Linear programming",
"image": ""
},
{
"text": "Linear regression",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "A problem X NP is said to be NP-complete if:",
"answers": [
{
"text": "X can be reduced to every Y NP in polytime",
"image": ""
},
{
"text": "every Y NP can be reduced to X in polytime",
"image": ""
},
{
"text": "no Y NP can be reduced to X in polytime",
"image": ""
},
{
"text": "none of the others",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "The quadratic loss of linear regression is:",
"answers": [
{
"text": "i=1m(yi-yi)2",
"image": ""
},
{
"text": "i=1m(xi-xi)2",
"image": ""
},
{
"text": "i=1m(xi-yi)2",
"image": ""
},
{
"text": "i=1m(yi2-yi2)2",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "What is the best threshold value for turning probability scores into binary predictions?",
"answers": [
{
"text": "the one that maximizes sensitivity",
"image": ""
},
{
"text": "it depends on the problem ",
"image": ""
},
{
"text": "the one that maximizes accuracy",
"image": ""
},
{
"text": "the one that maximizes specificity",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "In Human coding, the encoder:",
"answers": [
{
"text": "processes whole runs of identical input symbols",
"image": ""
},
{
"text": "works by solving a clustering problem",
"image": ""
},
{
"text": "works by solving a regression problem",
"image": ""
},
{
"text": "processes each input symbol individually ",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "The Maximum Likelihood Estimator for the parameters of a linear model with independent Gaussian noise is:",
"answers": [
{
"text": "the OLS solution vector * ",
"image": ""
},
{
"text": "the square root of the OLS solution *",
"image": ""
},
{
"text": "it depends on the dataset",
"image": ""
},
{
"text": "the vector of the generating process",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Consider the LP: min f(x,y)=x+y; x+y2; x,y0. The corresponding polytope is:",
"answers": [
{
"text": "degenerate",
"image": ""
},
{
"text": "bounded",
"image": ""
},
{
"text": "unbounded",
"image": ""
},
{
"text": "empty ",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Min-hashing maps each document to:",
"answers": [
{
"text": "one hash signature ",
"image": ""
},
{
"text": "a distance matrix",
"image": ""
},
{
"text": "the set of most frequent terms",
"image": ""
},
{
"text": "a real vector",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "How do you do a linear regression in R?",
"answers": [
{
"text": "predict(y x, data)",
"image": ""
},
{
"text": "lm(y x, data) ",
"image": ""
},
{
"text": "predict(y x, data, family=\"binomial\")",
"image": ""
},
{
"text": "lm(y x, data, family=\"binomial\")",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "How do you measure the significance of an estimate?",
"answers": [
{
"text": "with its magnitude",
"image": ""
},
{
"text": "with R^2",
"image": ""
},
{
"text": "with its p-value",
"image": ""
},
{
"text": "with its sign",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "A manufacturing company wants to nd out the relationship between the budget spent in advertising and the total sales of the next semester. They could use:",
"answers": [
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "The company wants to predict if a machine will have a technical failure in the next 10 days. This could be done with:",
"answers": [
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Moreover, items from the same production line are similar while those from different lines are radically different. You suggest to check by using:",
"answers": [
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "What is the true positive rate aka sensitivity?",
"answers": [
{
"text": "the fraction of negatives that are incorrectly classified",
"image": ""
},
{
"text": "the fraction of negatives that are correctly classified",
"image": ""
},
{
"text": "the fraction of positives that are incorrectly classified",
"image": ""
},
{
"text": "the fraction of positives that are correctly classified",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Single-linkage clustering works by:",
"answers": [
{
"text": "repeatedly recomputing the centroids of clusters",
"image": ""
},
{
"text": "repeatedly merging smaller clusters into larger ones",
"image": ""
},
{
"text": "enumerating all possible clustering of the given points",
"image": ""
},
{
"text": "enumerating all possible points in a cluster",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "You have a set of observations (x; y) with x; y 2 R. Which one of the following gives the highest R2?",
"answers": [
{
"text": "Simple linear regression",
"image": ""
},
{
"text": "Polynomial regression of degree 2",
"image": ""
},
{
"text": "Polynomial regression of degree 10",
"image": ""
},
{
"text": "Logistic regression\t",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Which one of the following performances indicates the best model for prediction?",
"answers": [
{
"text": "R2 = 0:2 on training, R2 = 0:1 on test",
"image": ""
},
{
"text": "R2 = 0:7 on training, R2 = 0:7 on test",
"image": ""
},
{
"text": "R2 = 0:8 on training, R2 = 0:1 on test",
"image": ""
},
{
"text": "R2 = 0:9 on training, R2 = \udbc0\udc000:9 on test",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Which task does not require a training set (i.e. a dataset used for learning a model)?",
"answers": [
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Classification",
"image": ""
},
{
"text": "Clustering",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "If you have n points, what is the number of clusters that minimizes the within-cluster sum of squares?",
"answers": [
{
"text": "1",
"image": ""
},
{
"text": "k",
"image": ""
},
{
"text": "n",
"image": ""
},
{
"text": "we cannot say",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "In the bias-variance decomposition of the expected squared error, what does a high bias suggest?",
"answers": [
{
"text": "noisy data",
"image": ""
},
{
"text": "overtting",
"image": ""
},
{
"text": "undertting",
"image": ""
},
{
"text": "crosstting",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "A set of observations (x1; y1), (x2; y2), \u2026,(xn; yn) obeys the law yi := axi + b + i where i is some random noise. The task of estimating a and b from the dataset is called:",
"answers": [
{
"text": "logistic regression",
"image": ""
},
{
"text": "linear regression",
"image": ""
},
{
"text": "linear programming",
"image": ""
},
{
"text": "logistic programming",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "A regression model (M1) on a training set gives R^2 = 0.5 while a second model (M2) gives R^2 = 0.9. What can we say about predictions on a test set?",
"answers": [
{
"text": "M2 will have error smaller than M1",
"image": ""
},
{
"text": "M2 will have error larger than M1",
"image": ""
},
{
"text": "M2 will have the same error as M1",
"image": ""
},
{
"text": "we cannot say",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "You developed a clinical test to distinguish sick patients from healthy patients. In the population, on average 998 out of 1000 people are healthy, and the test gives an incorrect prediction in 0.5% of the cases. This means the test:",
"answers": [
{
"text": "identifies all the healthy patients",
"image": ""
},
{
"text": "identifies all the sick patients",
"image": ""
},
{
"text": "could miss all the healthy patients",
"image": ""
},
{
"text": "could miss all the sick patients",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "How would you describe overfitting?",
"answers": [
{
"text": "the model is too complex and follows the noise",
"image": ""
},
{
"text": "the model is too complex and discards the noise",
"image": ""
},
{
"text": "the model is too simple and follows the noise",
"image": ""
},
{
"text": "the model is too simple and discards the noise",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "You have to convert the scores given by a logistic regression model into binary predictions. What is the best threshold?",
"answers": [
{
"text": "the one that maximizes accuracy",
"image": ""
},
{
"text": "the one that maximizes TPR",
"image": ""
},
{
"text": "the one that maximizes FPR",
"image": ""
},
{
"text": "it depends on the requirements",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Given a linear regression model, the expected squared error can be usefully decomposed in:",
"answers": [
{
"text": "SSE and SST",
"image": ""
},
{
"text": "underfit, overfit and noise",
"image": ""
},
{
"text": "bias, variance, and error",
"image": ""
},
{
"text": "variance and covariance",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Look at the confusion matrix below (1=positive=true,0=negative=false). What can we say?",
"answers": [
{
"text": "the specificity is 2/3",
"image": ""
},
{
"text": "the sensitivity is 2/3",
"image": ""
},
{
"text": "the accuracy is 2/3",
"image": ""
},
{
"text": "none of the above",
"image": ""
}
],
"correct": 0,
"image": "iVBORw0KGgoAAAANSUhEUgAAALUAAAB3CAYAAABFcwEWAAAABHNCSVQICAgIfAhkiAAAABl0RVh0U29mdHdhcmUAZ25vbWUtc2NyZWVuc2hvdO8Dvz4AAAAndEVYdENyZWF0aW9uIFRpbWUAbWVyIDE4IGdlbiAyMDIzLCAxMTo0MDo1N+3IuzAAACAASURBVHic7V15fFTV9f++WZOZLJAEEAgQq1AWI1sogiytwAcEG8EFEEUUQYpWkIIFcfmBpVhiFaxsLYuCiFAFhAY1KcoiVYpSkUCgZQlhD4SEEJLMvDdv3u+PeC5nXt5kn4HE+X4+85mZt9x73n3nnnvuOeeeC60CeL1e8c0//Jimadq2bdu0Hj16aBEREVp0dLTWs2dPLT09vaLiQ9DB6/Vqb731lgZAe+6558q0u/4d1FdwHuPfqqoa8iGHpGmaBj/gpzRNg8lkgtfr9TlmNptRThEh1ACapkFVVUiSBEmSRPubzWZ4vV5IknSjSQwK3G43rFar+M+fW5IkwZsES3mF0Q2SJEFVVdGgwHUmJ4b+qTRwoEHtqWmaaGNqfwCizYnR6yuoHVRVFW3g9XphsZSyLAlX4lGOKklqKkSW5TLX1ucGDjY403q9XiiKArPZ7DMq/lTa2+v1Cqbm2oLZbPaRzj7SuzJMTVKDhj1eWAghBBu8Yxt18ioxtaIosFqt2LNnD7755hvIsiyGxZ+K5Ag0uJRu2LAh8vPzy+jPdE19h14yW61W9O3bF0lJSVBVFRaLRYxmPlK7skwNAMXFxYiIiMCgQYNgsVjQqVMnKIoihocQag5q67y8PKxcuRLTpk3z0Sv5XKY+tzlnS6/XC6vVir179yIyMhIbNmxAcXExwsPDxTW8LSqcKFIv0DQNVqsVmqbB6XTihRdewF133RWAxwkBAGRZxpEjR/D666/faFJuGnz55Zf429/+5jOBNurc5TI13UDqB/9/7tw5oX6QRSSEmoPaODs7G0VFRZBlGYqiiFm/vxdZn0E8duHCBR/1jPNjlSQ1v4G+NU2DxWKBzWbzGRpDqDmovckua7PZAMDHTgvU/zkMZ1iTyQSLxeIjPP0xNABUy4yhL6S+N/CNgl6Y6H/XZ/BnL68djNqjXEkdws0HUgMJhs4Hg2MVndMf56NyZcrk11Z03t/1/p6lqggxdR0Ct3x4vd4yTFje5ImXYXQN/0/ueaqLT8zKo43oqOic0X8yW+p9INUJwQgxdR2BXsf0dw2/jsCZRz/B4tdwZiP9Vc/4/kD3ezyeMoaD8jqPvr6QpP4JgTNjTk4OMjMzYbFYYDKZoKoqwsPD0aVLF+FK54FnxBgkfQl0LwAfRszJyUFWVhbatWsHl8uFEydOIDExEQ6HA8B1xqQ6LBaLGDn4bx4ARzR4PB4x6fV4PLBYLCgoKEBmZiYSEhJwyy23+IwO1ZHUIX93HQEfoj///HPcc8896NOnD3r16oW+ffviF7/4Bd566y0UFhbC6/XC4/EI5lIURZTjdrvh9XqhqqqP40yWZbhcLqiqih07diA5ORmZmZlYt24devbsiSNHjggmpXgU6iSKokCSJJSUlGDfvn3weDyC6VVVhSzL8Hg8wtehqio8Hg+AUnt8VlYWhg0bhs2bN0OSJHE/PXdVEZLUdQgkrR0OB2699VaMGzcOt956KyRJwsaNG5GSkoJf/epXSEpK8pG8FotFSEhuGiRmlCQJdrtdHHc4HGjUqBHCw8MxfPhwdOjQAW3bthWOOL364/V6UVJSgsmTJ6NVq1bo2rWrsC3rVRGKIeJqhd1uR7NmzRAdHe1XhaoKQpK6DoEP+1FRUXj88cfxyCOPYOTIkZg2bRquXbuGw4cP44cffsAjjzyChQsX4p577sHzzz8Pi8WCpUuXok2bNmjdujVWr14Nq9UKt9sNSZLwl7/8Be3bt0dycjJycnIAACUlJTh8+DBWrFiBvLw80QHmzJmDdu3aITExEZs2bYLZbMa4ceOwcuVKLFy4EJMmTYLX68XZs2eRnJyM1q1bY/To0bh69ap4hk2bNqFDhw7o1q0bvv32W7jdbh/HCu9wVUVIUtch0AtWFAVutxsHDhwQKsCWLVtgMplw2223wePxIDU1FRs3bkR0dDR69uyJ1atXY9q0aWjZsiUURcGUKVPgcDjw0EMPYdGiRXj++ecRHR2N/Px8bN++HU6nEw0bNsTevXuxadMmvPjii2jRogVmzpyJlJQUxMTEQFEUPProo1i+fDlkWYbdbkdRURGuXLmCs2fP4sknn8Thw4cRFRWFL7/8EuPGjcOaNWuwa9cujB07Fi6XC1FRUZg8ebIIlgNKdX2z2VztYLmQpK6DCA8PR1ZWFoYMGYKEhAS0bNkSf/zjH/Hoo4+iXbt2Qv9+6qmncPHiRcyaNQurV69Gq1atsHTpUqxatQqdO3fGRx99BJfLhY0bNyIxMRHnzp3D+fPnMX78eKGHR0ZGIiYmBmFhYbh48SJWrlyJRx55BJcuXUJeXh4efvhhXL16FYsXL8btt9+OF198EatXr8bXX3+Nf/3rX5g8eTLWrl2L5557Dp9++im+++47pKenw+v1IjMzEzk5OZgzZ04ZVYMi86oTghGS1HUE/KXLsozY2FgMHDgQ8fHxUBQFLVu2xIQJE2AymVBcXIxmzZqhX79+UFUV+fn5kGUZmZmZ6Nu3ryinZ8+eyMnJQUFBAQYNGoSwsDDIsoxhw4YhLS0N+fn5CAsLg9vths1mQ15eHrxeL379619DlmWYTCasWrUKAHDs2DF4PB7k5+dDURRcvHgRiqJg5syZmDlzpqjzzJkzOHv2LHr16oUWLVpAlmUMHjwYy5cvR3FxsXhWUrNC6sdPAMTccXFxePXVV5GQkCDOkdVAkiSEhYUhLCwMJpMJ4eHhsNlsaN26NRYtWgSLxQJZlhEVFYVGjRohKioKX3zxBSRJgs1mw9atW3H58mVERUUJhna5XIiJiYEkSdi5cydGjBgBr9eLSZMm4Wc/+xlGjhyJhg0bIiIiAlarFQ0aNIDZbMbcuXPRvXt3EZjVuXNn7Nq1C+np6cjJyUHz5s2xa9cuHD9+HE6n02eiWF2EmLqOgEsuVVVx5coVnD9/Hk2bNi2zvElRFFy9elUs2nU6nRgxYgQmT56Ml156CUCpXfqZZ55Bjx49kJycjClTpiA+Ph4OhwNZWVmwWCxQFAWKoqC4uBiKoqBx48YYO3Ys3njjDXz22Wcwm804fvw45syZg4YNGyIsLAwpKSm4cOECpk6dip49e2LZsmXYuHEjVFVF+/bt0aNHDwwZMgTvvfceOnXqhAYNGuDixYsoLCwU9ALXOy/XrSuLkE5dh0BmroiICNxxxx2Ijo6G3W6H1WoVjhhJkhAVFYV27drB4XAIl/e4cePwf//3f8jOzkZ2djY6dOiAgQMHQtM0TJo0CXPnzgUANG3aFMuWLUO/fv1gtVoRExOD9u3bw+l0AgDmzp2LF154AW63Gy6XC3PnzsVLL70Ei8WCfv36oUmTJsjNzUXjxo2xZs0axMbG4sSJEygsLMRjjz2GyMhI3HvvvViyZAkcDgdMJhOWLFmC5ORk2Gy2MouKq8rQAACtAvDcCi6XS9M0TRs2bJi2efNmTdM0zePxVFRECFUAtXdWVpaWlJSkaZqmybIs8lyoqlrmelVVNY/Ho3m93jLvg/Jk+HtPdF9laVMUxfC4ni6Px2NYp6Io5fIMPQs9M/3XNE1bv369Nnz4cE3TNMGLRgipHzcxNN0aUZJgXHqRysFjJyidBeDrwiYvI6kxNputjFqjqqrw+lksFh+zGrngZVkWtOmdOXQt0UR2cB4qSl5IjakY/D6NuchNJpPwPurbRtOqsUgghBsH/qL4Sg/y6GnMQsAj92jFP2dmKoPupbL0cRkWiwVWq1VMOLUfYzmoDL7AleqmOjXtemIjqos6CL+e3OCckXnnBeCTCoJbQOgYbxNOPyHE1DcpeMAQMQ8xij4oSR+9RwzDGVAv0fSJiDhjEVPpM3KRhLVarWW8ffrFwJweApXLl6bpn1lfHnB9ORc/RpLa6N4QU9+k0EtqYrCKVoJwlMc05
},
{
"quest": "You are using k-means, and notice that different executions give different results. This happens since:",
"answers": [
{
"text": "k-means is randomized",
"image": ""
},
{
"text": "clustering can take exponential time",
"image": ""
},
{
"text": "this is unsupervised learning",
"image": ""
},
{
"text": "you are using the wrong value for k",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "You have 6 observations; their class (Positive or Negative) and the score given by a logistic regression are as follows: (P,0.9), (P,0.85), (N,0.75), (P,0.5), (N,0.4), (N,0.3). If you do not want the false positive rate of your classier to exceed 1/3, the best choice is to predict \u201cY\" whenever the score is at least:",
"answers": [
{
"text": "1.2",
"image": ""
},
{
"text": "1.0",
"image": ""
},
{
"text": "0.45",
"image": ""
},
{
"text": "0.25",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Logistic regression finds the parameters that maximize: ",
"answers": [
{
"text": "the mean square error of the input data",
"image": ""
},
{
"text": "the skewness of the input data",
"image": ""
},
{
"text": "the inter-cluster distance of the input data",
"image": ""
},
{
"text": "the log-likelihood of the input data",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "What does the Bayesian Optimal Classier need to know in order to work?",
"answers": [
{
"text": "the marginal distribution of each variable",
"image": ""
},
{
"text": "the marginal distribution of the label",
"image": ""
},
{
"text": "the joint distribution of variables and label",
"image": ""
},
{
"text": "the joint distribution of the variables",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Which one of the following classifiers has the best performance?",
"answers": [
{
"text": "TPR=0.2, FPR=0.2",
"image": ""
},
{
"text": "TPR=0.2, FPR=0.8",
"image": ""
},
{
"text": "TPR=0.8, FPR=0.2",
"image": ""
},
{
"text": "TPR=0.8, FPR=0.8",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Your boss calls you to tell your new regression model seems completely useless for prediction, in spite of the high R^2 of the t. You realize that probably there is:",
"answers": [
{
"text": "underfitting",
"image": ""
},
{
"text": "overfitting",
"image": ""
},
{
"text": "correlation",
"image": ""
},
{
"text": "no tomorrow",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "From the confusion matrix below, what can we say?",
"answers": [
{
"text": "R^2 = 0:67",
"image": ""
},
{
"text": "accuracy = 80%",
"image": ""
},
{
"text": "all good things must come to an end",
"image": ""
},
{
"text": "sensitivity < 80%",
"image": ""
}
],
"correct": 1,
"image": "iVBORw0KGgoAAAANSUhEUgAAALUAAAB3CAYAAABFcwEWAAAABHNCSVQICAgIfAhkiAAAABl0RVh0U29mdHdhcmUAZ25vbWUtc2NyZWVuc2hvdO8Dvz4AAAAndEVYdENyZWF0aW9uIFRpbWUAbWVyIDE4IGdlbiAyMDIzLCAxMTo0MToyNfQ7K74AACAASURBVHic7V1pdBRVFv6qu6u7s9IJgSQQwIAsYQAjbixzjAgGQSEgMqBiWBURUAR3nTkqjAIyo44K4mhEzeC4gIoIiLgzDkeEMGpEnEASlkTI0iHpJL1Udc2PnPt4Xekk3Ul1OmTqO6dPL1X93qtXX91337333ScoiqJAh45OBEO4G6BDh9bQSa2j00EntY5OB53UOjoddFLr6HQwBXqioigQBIG9y7IMWZZD2TYd/8cwGo0wGo2NeBcIBN2kp6OzISBJzfPe7XbDYrEgNzcXb7zxBkRRhMvlClkD/99gMpkgSRIMBgO8Xm+4m9PusFgs8Hg8yM7OxqxZs+ByuWA2m9nxQKR1QJKaTpEkCZIkISIiAvPmzUPv3r1x0003we12w2g0Bjw86PAFDa2VlZXIzs7G9u3b4XQ6YTabYTAYoCgKFEVhnztjPyuKAlmWYTab8dZbb+H48ePIyclBfX09TCYTTKYG+RvItQckqQVBgNfrhSiKkCQJQMMTNXDgQAwcOLANl6KDR21tLXr27Im0tLRwNyWsGDhwIE6fPs2+i6IIr9cLgyEwu0bA6gcRmy+4vr4eiqL4DBGdUYqEGtS/dXV1cLvdUBSFSWrqc+rXzti/pAmQaltfX8+OkRoWzGQxYOsHgSweXq8XJpMJgiB0+k5vDwiC4POiPqX+VBSFWQPofLVlQH1M/Zm+U3mBtMnfeYHU5a9edTvVbaJrNplMbD4hyzJEUWyxrTyCIjX/pBgMBt2kFwLwQoH0aK/X2yyh6VyesP4+03d1PWq0ZEYLpK6m6vVXpprksiwzjSBYcx4QhE5NkoJXP/jPTT3VOoIDTzpeavMqIHBuWObvAT+hpCGb7guVS+fzv9F3XmBJksQkplq6U/n86OHvwaO28iMOAB+rjj/SqjnG26sDQUCad1NPLn+RnXVW3t7g1TjqV/ouCAJzStB3r9cLSZIYURRFYd+JbDx5vV4vPB6Pz/lULtUpyzL7L3BO5eSlLV82XxbVIUmSz3f6DYDPdUmSxJx5hEDVlqYQlKRuTj9rzTChozHUxKEb7na7IcuyD9mtViuMRiP7L0luMo8R4Uwmk0959B+StHV1dQDO2YhJr5UkCaIoNpqs0WhgNBpZm3hi+lMdqD0Oh4OVT2VQnfz/+P4IllsBx340NxHUJ4naQd2XkiTBaDRi7dq1iI2NRUxMDKKiohAdHY0//OEPKCsrg6IozGridruZ34Ckdl1dHTweDyM8SWuPxwNJknDfffdh9uzZcDgcGDRoEBYtWgSj0QhRFOHxeOB2u9m5/D0+ceIEk75utxuSJLGHj1eTnE4ncygtXrwYS5YsgdlsZiMNb4duqT8CQdDWDx2hBX8TSToJggCXy4U+ffpg0qRJEEURDocDubm5WLduHdasWcOkL5HDH1k8Hg+TwBaLhf1uNptRW1sLi8WCVatWITk5mUlg3psHNKgiRqMRq1evhsFgwP3339+oPKDhYaQyIiMjff7Pqyv8SKPVnEwndQeDv8kbqRPp6el4/vnn2XGHw4G9e/eipqYGL7zwAuLj43HixAmUlpbiueeeQ15eHlauXAkAWLJkCSZPnszI/pe//AWffvoprr/+eiiKAovFAkVRcOrUKWZCMxgM+O677/Dkk0/C5XLh1ltvxc0334zHH38cjz32GC644AK43W48+uijKCwsxAMPPICqqirMmDED8+fPZ9J51apV+OabbzB9+nRERETA6XRCURQ2EeVVJC2gk7qDgdepCSStq6urUVhYCIvFArvdjh9++AFpaWkwmUzYvXs3vvzyS9hsNowcORLffvstbr31VsiyDJPJhMWLF0MURUyYMIGR0mKxIC8vDzU1NZg4cSKcTic2btyIMWPGYMaMGThw4ACmT5+OEydOIDIyErt27YLdbsfJkychiiJOnDiBX375BceOHcPs2bPx888/w2w244cffoAkSVi4cCFWrFiBv/71r7BarTh06BCqq6tx0003QRAEpiYRobVSX/V46g4KGprpRouiiC+++AJ9+/ZFz549MWTIEFRUVGD+/PmwWCyIjIzEhRdeiMLCQuzYsQM7duyALMvIycnB+++/j379+uGdd96Bw+HAW2+9hWnTpsHpdOLw4cMYMGAA3G43RFFEQkICbDYbFEXBxo0bIUkSzpw5A4fDgYceegjl5eVYt24dLr74Ytx3333Izc3F9u3bsX//fqxZswZbtmzB73//e2zYsAGFhYXYsmULbr/9dtTX12P//v1ISUlhujmZiHk1SwvokrqDQa1T86avtLQ0zJw5E1arFZIkYfz48bjkkkvgdDphMplw9dVXo0uXLpBlGXa7HRUVFcjKymJlR0VFoaSkBB6PB9dddx0URUF8fDwmTJiAgoICuN1uAGC68OnTpzFmzBgkJCRAlmU8+eSTAIDKykq4XC6mRtjtdrhcLtx2222srgsvvBBHjhyB0WhEZmYmFEVB7969MX78eFRXV7OHlqwqpIZoAZ3UHQxqzxtP7gEDBuBPf/qTz/k0ITMYDOjSpQsz69lsNsTFxeHtt99Gly5d4HA4YLPZkJycDLPZjK1bt2Lu3Lk4deoUdu7cif79+zMTmyiKUBQFvXr1wttvv42SkhL06NEDy5cvR11dHVatWoWePXvCYrFAEATEx8cjIiICGzduxKBBg+BwOBAREYEePXrA4/Fgx44dmDZtGo4dO4YdO3YgIyPDxwTIexC1gK5+dDD4cyGT9aO0tBQ1NTVwOp1wOp3MTGexWFBWVoaKigpGjrlz58JsNiMrKwtjx47FggULcOTIEcTExGDOnDnYvn07BEHAkCFD8NNPP+Hs2bMQBAHl5eWoqKiAIAhYsGABunbtip49e8JkMuGZZ55Bz549ERcXB6PRiDVr1mD69OkYP348Ro8ejTvuuAPjxo3DLbfcgm+++Qa9e/fGrFmzkJOTA0EQMGLECBQWFsLhcDDHDU0YdfWjE0Mdm0FD8xVXXIEePXrAYrFAFEUf6SbLMqZNm4a4uDgmAdPT0/Hhhx/i6aefhtfrxdVXX43p06dDURQ88MADiI2Nxeeff44JEyYgIiKCWT1mzpyJgQMHwuv1Ij09Hbm5uXjhhRdQW1uL0aNHY9myZZBlGfPnz0d0dDT69++P/v37IycnB48//jjKy8sxbNgwLFq0CIqiYOXKlbDZbPj+++8xefJkSJKE2tpaeDweplNrvRgi6OVcbrcbZrMZixYtwogRIzB79mwWJ6CjdSBpXF5ejuuvvx779u1j5jBSP4jAPAFILyXbMR2nB4G3AQPnHDl8eU2BHhq19ORNcPy56rpICqtt0XxbeLMelS2KIl5//XXs27cPGzZsYHwLBjoTOxj8hR5QPIaaTHSewWBgrmYiEP2H/503nZEHkiec0WhklgneiUMEFAQBoigyBw4JM5rwUV18LAkAFmuirosPlONjSNoKndQdFOowTprE8eClqL+YY4PBAKvVysrgyyTpp47pIf2Wn6iqJSVJWP4hEQSBOXD4dvHRhv6uS12XFtBJ3UHBE00d+tnUef7QUhkt1eOvfPV/Wjpf3Q6115TedY/i/wGaior0d14wZbb0W0vf1b8Fcn4gx3VJ3Unh9XpZVJxaZejsoDhwivwDml8x0xR0UncgeL1eWCwWmM3moGf8nQF0zd27d2eOGXUsfyDE1kndgWC1WnHs2DEsXrw4qJUeWoEIExcXh7NnzzYiVqhB0YI//vgjhg8fDuCciVKX1OcpvF4voqKicNlll7HYZ3+TslCoI/wqmeXLl+Ohhx5CQkICO9YeKpDX64XZbEZNTQ08Hg+rm/pCl9TnIdxuN7p37445c+aEtR2bN2/GokWLEBUVFZb6KcyVEKxjTyd1BwI5UWhZFC+dQrm4mXfImM1mOBwOl
},
{
"quest": "Consider the LP: max f(x,y)=x+3y; x10; y3. The value of the optimal solution is:",
"answers": [
{
"text": "19",
"image": ""
},
{
"text": "23",
"image": ""
},
{
"text": "12",
"image": ""
},
{
"text": "40",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Your friend proposes a novel clustering algorithm that tries all possible clusterings of the data. This algorithm:",
"answers": [
{
"text": "has exponential complexity",
"image": ""
},
{
"text": "is efficient but gives poor clusterings",
"image": ""
},
{
"text": "has polynomial complexity",
"image": ""
},
{
"text": "is efficient and gives good clusterings",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "In a binary classier build by thresholding the scores of a logistic regression model, the positive observations:",
"answers": [
{
"text": "have a score strictly higher than all the negatives",
"image": ""
},
{
"text": "have higher density than the negatives",
"image": ""
},
{
"text": "are at least as many as the negatives",
"image": ""
},
{
"text": "are separated from the negatives by a hyperplane",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "The class NP contains all problems whose solution:",
"answers": [
{
"text": "can be verified in polytime",
"image": ""
},
{
"text": "requires exponential time",
"image": ""
},
{
"text": "none of the others",
"image": ""
},
{
"text": "can be computed in polytime",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "Lloyd's algorithm for k-means works by:",
"answers": [
{
"text": "evaluating all possible points in a cluster",
"image": ""
},
{
"text": "evaluating all possible clustering of the points",
"image": ""
},
{
"text": "repeatedly merging clusters",
"image": ""
},
{
"text": "repeatedly adjusting the centroids of clusters",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "You want to learn how your revenue depends on parameters such as number of working hours, etc. You could use:",
"answers": [
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "In least squares, R^2 can be seen as:",
"answers": [
{
"text": "the norm of the parameter vector",
"image": ""
},
{
"text": "none of the others",
"image": ""
},
{
"text": "the gain over a baseline model",
"image": ""
},
{
"text": "the inverse of the SSE",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "The ROC curve shows:",
"answers": [
{
"text": "specificity versus sensitivity",
"image": ""
},
{
"text": "specificity versus FPR",
"image": ""
},
{
"text": "TPR versus sensitivity",
"image": ""
},
{
"text": "TPR versus FPR",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Can feature scaling improve the model fitted via least squares?",
"answers": [
{
"text": "yes, in terms of p-values",
"image": ""
},
{
"text": "no",
"image": ""
},
{
"text": "yes, in terms of interpretability",
"image": ""
},
{
"text": "yes, in terms of R2",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Can a clustering on n points achieve 0 within-cluster sum of squares?",
"answers": [
{
"text": "yes, with 1 cluster",
"image": ""
},
{
"text": "yes, with k clusters",
"image": ""
},
{
"text": "yes, with n clusters",
"image": ""
},
{
"text": "no, never",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "In linear regression, if the p-value for the estimate i is small enough, then we:",
"answers": [
{
"text": "accept the null hypothesis i = 0",
"image": ""
},
{
"text": "reject the null hypothesis i = 0",
"image": ""
},
{
"text": "use a model with more features",
"image": ""
},
{
"text": "use a model with more parameters",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Texts written in the same language have a similar letter frequency distribution. You can check this fact by:",
"answers": [
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
},
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Texts written in the same language have a similar letter frequency distribution. You can check this fact by:",
"answers": [
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
},
{
"text": "Linear Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Two classifiers, C1 and C2, have accuracy respectively 98\\\\% \\\\and 95%. Which one is the best?",
"answers": [
{
"text": "C1",
"image": ""
},
{
"text": "They are equivalent",
"image": ""
},
{
"text": "We cannot say",
"image": ""
},
{
"text": "C2",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Correlation clustering asks to minimize:",
"answers": [
{
"text": "The root mean squared error",
"image": ""
},
{
"text": "The number of disagreements",
"image": ""
},
{
"text": "The intra-cluster variance",
"image": ""
},
{
"text": "The running time",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "If you increase the complexity of your linear regression model, eventually the SSE on the test set will:",
"answers": [
{
"text": "Approach zero",
"image": ""
},
{
"text": "Cancel the training error",
"image": ""
},
{
"text": "Exceed the training error",
"image": ""
},
{
"text": "Become negative",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Classification accuracy is misleading when:",
"answers": [
{
"text": "The label proportions are unbalanced",
"image": ""
},
{
"text": "The dataset is too small",
"image": ""
},
{
"text": "The label proprtions are balanced",
"image": ""
},
{
"text": "The dataset is too large",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "The worst-case running time of the k-means algorithm on the n points is:",
"answers": [
{
"text": "Polynomial in n",
"image": ""
},
{
"text": "Superpolynomial in n",
"image": ""
},
{
"text": "Linear in n",
"image": ""
},
{
"text": "Unbounded in n",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "In linear regression, the expected squared error s the sum of:",
"answers": [
{
"text": "The good the bad and the ugly",
"image": ""
},
{
"text": "Squared bias and variance and noise",
"image": ""
},
{
"text": "Underfit and overfit the noise",
"image": ""
},
{
"text": "Variance and covariance and noise",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Your friend proposes an innovative clustering algorithm that enumerates all possible clusterings of the points. This algorithm:",
"answers": [
{
"text": "Has exponential complexity",
"image": ""
},
{
"text": "Has polynomial complexity",
"image": ""
},
{
"text": "Is efficient but gives poor clustering",
"image": ""
},
{
"text": "Is efficient and gives good clusterings",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "A high R^2 on a given dataset means:",
"answers": [
{
"text": "A large error on new data",
"image": ""
},
{
"text": "A large error on that data",
"image": ""
},
{
"text": "A small error on that data",
"image": ""
},
{
"text": "A small error on new data",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Multicollinearity arises if the features vectors are:",
"answers": [
{
"text": "absolutely orthonogal",
"image": ""
},
{
"text": "linearly dependent",
"image": ""
},
{
"text": "linearly independent",
"image": ""
},
{
"text": "positive semidefinite",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "A logistic regression model learns:",
"answers": [
{
"text": "The conditional distribution of predictors",
"image": ""
},
{
"text": "The conditional distribution of labels",
"image": ""
},
{
"text": "The marginal distribution of predictors",
"image": ""
},
{
"text": "The marginal distribution of labels",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Consider the LP: min f(x,y) = x + y; x+y >= 2; x, y <= 0. The corresponding polytope is:",
"answers": [
{
"text": "Bounded",
"image": ""
},
{
"text": "empty",
"image": ""
},
{
"text": "Degenerate",
"image": ""
},
{
"text": "Unbounded",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "To measure the efficiency of algorithms we use:",
"answers": [
{
"text": "convex analysis",
"image": ""
},
{
"text": "asymptotic analysis",
"image": ""
},
{
"text": "squared analysis",
"image": ""
},
{
"text": "clinical analysis",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "Everything else being equal, what does suggest a good clustering?",
"answers": [
{
"text": "a high p-value",
"image": ""
},
{
"text": "a low within-cluster sum of squares",
"image": ""
},
{
"text": "a large number of observations",
"image": ""
},
{
"text": "a small number of clusters",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "The set cover problem:",
"answers": [
{
"text": "Can be solved in constant time",
"image": ""
},
{
"text": "is part of linear programming",
"image": ""
},
{
"text": "is NP-Complete",
"image": ""
},
{
"text": "is P-Complete",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "A company must allocate 5M\u20ac so that each department receives a minimum amount. You can use:",
"answers": [
{
"text": "Linear regression",
"image": ""
},
{
"text": "Logistic Regression",
"image": ""
},
{
"text": "Clustering",
"image": ""
},
{
"text": "Linear Programming",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "With hierarchical clustering on n points you can get:",
"answers": [
{
"text": "Between 1 and n clusters",
"image": ""
},
{
"text": "No satisfaction",
"image": ""
},
{
"text": "Up to 2^n clusters",
"image": ""
},
{
"text": "At most log(n) clusters",
"image": ""
}
],
"correct": 0,
"image": ""
},
{
"quest": "The standard assumption of linear regression is that the noise across the observations:",
"answers": [
{
"text": "is fast and furios",
"image": ""
},
{
"text": "is always bounded",
"image": ""
},
{
"text": "is Gaussian and correlated",
"image": ""
},
{
"text": "is Gaussian and independent",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "The ROC curve is used to measure:",
"answers": [
{
"text": "The amount of overfitting and underfitting",
"image": ""
},
{
"text": "The noise in the training dataset",
"image": ""
},
{
"text": "The performance of binary classifiers",
"image": ""
},
{
"text": "The MSE obtained by a linear regression\"",
"image": ""
}
],
"correct": 2,
"image": ""
},
{
"quest": "Geometrically, each constraint of a linear program corresponds to:",
"answers": [
{
"text": "a vector",
"image": ""
},
{
"text": "A double-space",
"image": ""
},
{
"text": "a cone",
"image": ""
},
{
"text": "a half-space",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Many well-known clustering problems are:",
"answers": [
{
"text": "impossible to solve",
"image": ""
},
{
"text": "NP-hard",
"image": ""
},
{
"text": "easy to solve",
"image": ""
},
{
"text": "infeasible\"",
"image": ""
}
],
"correct": 1,
"image": ""
},
{
"quest": "A polytope is:",
"answers": [
{
"text": "The difference of half-spaces",
"image": ""
},
{
"text": "the greatest gift of all",
"image": ""
},
{
"text": "the union of half-spaces",
"image": ""
},
{
"text": "The intersection of half spaces",
"image": ""
}
],
"correct": 3,
"image": ""
},
{
"quest": "Everything else being equal. What does suggest good clustering?",
"answers": [
{
"text": "Few clusters",
"image": ""
},
{
"text": "low within-cluster sum of squares",
"image": ""
},
{
"text": "high p-value",
"image": ""
},
{
"text": "large number of points",
"image": ""
}
],
"correct": 0,
"image": ""
}
]