import pandas as pd
import matplotlib.pyplot as plt

ffr_df = pd.read_csv('FastFoodRestaurants.csv')
obesity_df = pd.read_csv('Nutrition_Activity_Obesity.csv')
Datafini_ffr_df = pd.read_csv('Datafiniti_Fast_Food_Restaurants.csv')

display(obesity_df)
display(ffr_df)
display(Datafini_ffr_df)

print("Data_Value_Unit # of Non NaN:", obesity_df['Data_Value_Unit'].count()) # Count the number of rows

Data_Value_Unit # of Non NaN: 0

obesity_df = obesity_df.drop(columns=['Data_Value_Unit'])

obesity_df['Data_Value_Type'].unique()

array(['Value'], dtype=object)

obesity_df = obesity_df.drop(columns=['Data_Value_Type'])
display(obesity_df)

obesity_df['Data_Value_Alt'].equals(obesity_df['Data_Value'])

True

obesity_df = obesity_df.drop(columns=['Data_Value_Alt','QuestionID', 'DataValueTypeID', 'ClassID', 'TopicID', 'LocationID', 'StratificationCategoryId1', 'StratificationID1'])
obesity_df

obesity_df['Data_Value_Footnote_Symbol'].unique()

array([nan, '~'], dtype=object)

obesity_df['Data_Value_Footnote'].unique()

array([nan, 'Data not available because sample size is insufficient.'],
      dtype=object)

obesity_df = obesity_df.drop(columns=['Data_Value_Footnote_Symbol', 'Data_Value_Footnote'])
obesity_df

obesity_df = obesity_df.drop(columns=['YearStart', 'Datasource', 'Topic', 'Low_Confidence_Limit','Sample_Size', 'Total', 'StratificationCategory1'])

obesity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93249 entries, 0 to 93248
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   YearEnd                 93249 non-null  int64  
 1   LocationAbbr            93249 non-null  object 
 2   LocationDesc            93249 non-null  object 
 3   Class                   93249 non-null  object 
 4   Question                93249 non-null  object 
 5   Data_Value              84014 non-null  float64
 6   High_Confidence_Limit   84014 non-null  float64
 7   Age(years)              19980 non-null  object 
 8   Education               13320 non-null  object 
 9   Gender                  6660 non-null   object 
 10  Income                  23310 non-null  object 
 11  Race/Ethnicity          26640 non-null  object 
 12  GeoLocation             91513 non-null  object 
 13  Stratification1         93240 non-null  object 
dtypes: float64(2), int64(1), object(11)
memory usage: 10.0+ MB

obesity_df.describe()

obesity_df['Data_Value'].plot(kind='hist', bins = 30)
plt.show()

display(ffr_df)

ffr_df.drop(columns=['keys', 'websites', 'postalCode'])
# let's check if all the country values is US
ffr_df['country'].unique()

array(['US'], dtype=object)

ffr_df.drop(columns=['keys', 'websites', 'country'])

import scipy.stats as st


def obesity_level(val):
  if val > 37:
    return 'High'
  elif 24.4 <= val <= 37:
    return 'Medium'
  else:
    return 'Low'

obesity_df['level_of_obesity'] = obesity_df['Data_Value'].apply(obesity_level)

new_obesity = obesity_df.dropna(subset = ['Education', 'Data_Value'])

contingency_table = pd.crosstab(new_obesity['Education'], new_obesity['level_of_obesity'])

print(contingency_table)

contingency_table.plot(kind = 'bar')

plt.title('Relationship between education and obesity')
plt.ylabel('Obesity count')

plt.show()

level_of_obesity                  High   Low  Medium
Education                                           
College graduate                   867  1173    1273
High school graduate               637   527    2149
Less than high school             1118   621    1574
Some college or technical school   613   855    1845

res = st.chi2_contingency(contingency_table)

f"{res.pvalue:.162f}"

'0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005'

import statsmodels.api as sm
import seaborn as sns

state_names = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut',
               'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa',
               'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan',
               'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
               'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma',
               'Oregon', 'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
               'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']

filtered_obesity_df = obesity_df[obesity_df['LocationDesc'].isin(state_names)]
most_recent_df = filtered_obesity_df.loc[filtered_obesity_df.groupby('LocationDesc')['YearEnd'].idxmax()]

ffr_df['province'].replace('Co Spgs', 'CO')
province_counts = ffr_df['province'].value_counts().reset_index()
province_counts.columns = ['LocationAbbr', 'count']

merged_df = pd.merge(province_counts, most_recent_df, on='LocationAbbr', how='inner')

X = merged_df['count']
y = merged_df['Data_Value']

X = sm.add_constant(X)

model = sm.OLS(y, X).fit()
plt.figure(figsize=(10, 6))

sns.scatterplot(data=merged_df, x='count', y='Data_Value', color='blue', label='Data Points')

sns.regplot(data=merged_df, x='count', y='Data_Value', scatter=False, color='red', label='Regression Line')

plt.title('Relationship Between Fast Food Restaurant Count and Obesity Rates')
plt.xlabel('Number of Fast Food Restaurants')
plt.ylabel('Obesity Rate (%)')
plt.legend()
plt.grid(True)

income_df = obesity_df[['Income', 'Data_Value']].dropna()

g1 = income_df[income_df['Income'] == 'Less than $15,000']['Data_Value']
g2 = income_df[income_df['Income'] == '$15,000 - $24,999']['Data_Value']
g3 = income_df[income_df['Income'] == '$25,000 - $34,999']['Data_Value']
g4 = income_df[income_df['Income'] == '$35,000 - $49,999']['Data_Value']
g5 = income_df[income_df['Income'] == '$50,000 - $74,999']['Data_Value']
g6 = income_df[income_df['Income'] == '$75,000 or greater']['Data_Value']
g7 = income_df[income_df['Income'] == 'Data not reported']['Data_Value']

res = st.f_oneway(g1, g2, g3, g4, g5, g6, g7)

res.pvalue

1.027379650584492e-30

import pandas as pd
import matplotlib.pyplot as plt

# Group the data based on Income categories and calculate their means
income_groups = [
    "Less than $15,000",
    "$15,000 - $24,999",
    "$25,000 - $34,999",
    "$35,000 - $49,999",
    "$50,000 - $74,999",
    "$75,000 or greater",
    "Data not reported"
]

mean_values = [
    g1.mean(),
    g2.mean(),
    g3.mean(),
    g4.mean(),
    g5.mean(),
    g6.mean(),
    g7.mean()
]

# Plot the means
plt.bar(income_groups, mean_values, color='skyblue')
plt.xlabel('Income Group')
plt.ylabel('Mean Data Value')
plt.title('Mean Obesity Data Value by Income Group')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

from statsmodels.stats.multicomp import pairwise_tukeyhsd

tukey = pairwise_tukeyhsd(
    endog = income_df['Data_Value'],    # Dependent variable
    groups = income_df['Income'],  # Categorical group labels
    alpha = 0.05                          # Significance level
)

# Print the results
print(tukey)

# Optionally, plot the results
tukey.plot_simultaneous()
plt.show()

            Multiple Comparison of Means - Tukey HSD, FWER=0.05             
============================================================================
      group1             group2       meandiff p-adj   lower   upper  reject
----------------------------------------------------------------------------
 $15,000 - $24,999  $25,000 - $34,999  -0.3854 0.6571 -1.0789  0.3081  False
 $15,000 - $24,999  $35,000 - $49,999  -0.5283 0.2709 -1.2218  0.1652  False
 $15,000 - $24,999  $50,000 - $74,999  -0.8289 0.0078 -1.5227 -0.1351   True
 $15,000 - $24,999 $75,000 or greater  -1.3017    0.0 -1.9952 -0.6082   True
 $15,000 - $24,999  Data not reported  -2.3067    0.0 -3.0002 -1.6132   True
 $15,000 - $24,999  Less than $15,000   0.1441 0.9964 -0.5494  0.8377  False
 $25,000 - $34,999  $35,000 - $49,999  -0.1429 0.9966 -0.8364  0.5506  False
 $25,000 - $34,999  $50,000 - $74,999  -0.4435 0.4904 -1.1373  0.2503  False
 $25,000 - $34,999 $75,000 or greater  -0.9163 0.0019 -1.6098 -0.2227   True
 $25,000 - $34,999  Data not reported  -1.9213    0.0 -2.6148 -1.2278   True
 $25,000 - $34,999  Less than $15,000   0.5296 0.2681  -0.164  1.2231  False
 $35,000 - $49,999  $50,000 - $74,999  -0.3006 0.8624 -0.9944  0.3932  False
 $35,000 - $49,999 $75,000 or greater  -0.7734 0.0175 -1.4669 -0.0799   True
 $35,000 - $49,999  Data not reported  -1.7784    0.0 -2.4719 -1.0849   True
 $35,000 - $49,999  Less than $15,000   0.6724 0.0644 -0.0211   1.366  False
 $50,000 - $74,999 $75,000 or greater  -0.4728 0.4086 -1.1666   0.221  False
 $50,000 - $74,999  Data not reported  -1.4778    0.0 -2.1716  -0.784   True
 $50,000 - $74,999  Less than $15,000    0.973 0.0007  0.2793  1.6668   True
$75,000 or greater  Data not reported   -1.005 0.0004 -1.6985 -0.3115   True
$75,000 or greater  Less than $15,000   1.4458    0.0  0.7523  2.1393   True
 Data not reported  Less than $15,000   2.4508    0.0  1.7573  3.1444   True
----------------------------------------------------------------------------

# Education is first:
display(obesity_df['Education'].unique())

array([nan, 'High school graduate', 'Less than high school',
       'Some college or technical school', 'College graduate'],
      dtype=object)

# assign each level of education a number from 1 to 4
education_dict = {'Less than high school': 1, 'High school graduate': 2, 'Some college or technical school': 3, 'College graduate':4}
obesity_df['Education'] = obesity_df['Education'].replace(education_dict)
display(obesity_df['Education'].unique())

<ipython-input-85-4d2faeddb84a>:3: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  obesity_df['Education'] = obesity_df['Education'].replace(education_dict)

array([nan,  2.,  1.,  3.,  4.])

# Then income
display(obesity_df['Income'].unique())

array([nan, '$50,000 - $74,999', 'Data not reported', 'Less than $15,000',
       '$25,000 - $34,999', '$15,000 - $24,999', '$35,000 - $49,999',
       '$75,000 or greater'], dtype=object)

# assign each level of income a number from 1 to 6 leaving 'Data not reported' to be converted to nan
income_dict = {'Less than $15,000':1, '$15,000 - $24,999':2, '$25,000 - $34,999':3, '$35,000 - $49,999':4, '$50,000 - $74,999':5, '$75,000 or greater':6}
obesity_df['Income'] = obesity_df['Income'].replace(income_dict)
display(obesity_df['Income'].unique())
obesity_df['Income'] = obesity_df['Income'].replace({'Data not reported': float('NaN')})

array([nan, 5, 'Data not reported', 1, 3, 2, 4, 6], dtype=object)

<ipython-input-87-73d415d0410c>:5: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  obesity_df['Income'] = obesity_df['Income'].replace({'Data not reported': float('NaN')})

# Age
display(obesity_df['Age(years)'].unique())

array([nan, '25 - 34', '55 - 64', '18 - 24', '45 - 54', '35 - 44',
       '65 or older'], dtype=object)

# assign each level of age a number from 1 to 6
age_dict = {'18 - 24':1, '25 - 34':2, '35 - 44':3, '45 - 54':4, '55 - 64':5, '65 or older':6}
obesity_df['Age(years)'] = obesity_df['Age(years)'].replace(age_dict)
display(obesity_df['Age(years)'].unique())

<ipython-input-89-18bb04d1ef4c>:3: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  obesity_df['Age(years)'] = obesity_df['Age(years)'].replace(age_dict)

array([nan,  2.,  5.,  1.,  4.,  3.,  6.])

# Race/ethnicity
display(obesity_df['Race/Ethnicity'].unique())

array(['Hispanic', nan, 'American Indian/Alaska Native', 'Asian',
       'Non-Hispanic White', 'Other', '2 or more races',
       'Hawaiian/Pacific Islander', 'Non-Hispanic Black'], dtype=object)

# assign each specified race a number from 1 to 8
race_dict = {'Non-Hispanic White':1, 'Non-Hispanic Black':2, 'Hispanic':3, 'Asian':4, 'American Indian/Alaska Native':5, 'Hawaiian/Pacific Islander':6, '2 or more races':7, 'Other':8}
obesity_df['Race/Ethnicity'] = obesity_df['Race/Ethnicity'].replace(race_dict)
display(obesity_df['Race/Ethnicity'].unique())

<ipython-input-91-77004d7ace83>:3: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  obesity_df['Race/Ethnicity'] = obesity_df['Race/Ethnicity'].replace(race_dict)

array([ 3., nan,  5.,  4.,  1.,  8.,  7.,  6.,  2.])

# Class: Target Value for first Model
display(obesity_df['Class'].unique())

array(['Physical Activity', 'Obesity / Weight Status',
       'Fruits and Vegetables'], dtype=object)

# assign each class a number from 1 to 3
class_dict = {'Obesity / Weight Status':1, 'Physical Activity':2, 'Fruits and Vegetables':3}
obesity_df['Class'] = obesity_df['Class'].replace(class_dict)
display(obesity_df['Class'].unique())

<ipython-input-93-fc1c6120ad8f>:3: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  obesity_df['Class'] = obesity_df['Class'].replace(class_dict)

array([2, 1, 3])

obesity_df['LocationDesc'].value_counts()

features = obesity_df[['Education', 'Income', 'LocationDesc', 'Age(years)', 'Race/Ethnicity', 'Data_Value', 'Class']]
grouped_features = features.groupby('LocationDesc')
grouped_features.count()
display(grouped_features.get_group('Alabama'))

from sklearn.impute import KNNImputer

k = 3
imputer = KNNImputer(n_neighbors=k)
new_table = pd.DataFrame()
# knn imputation for each location
for loc in grouped_features.groups.keys():
  # make a new sub table with no missing values
  new_subtable = imputer.fit_transform(grouped_features.get_group(loc).drop(columns = ['LocationDesc']))

  new_subtable = pd.DataFrame(new_subtable, columns = grouped_features.get_group(loc).drop(columns = 'LocationDesc').columns)
  # append sub table to the new table
  new_table = pd.concat([new_table, new_subtable])

display(new_table)

new_table['Education'] = new_table['Education'].apply(lambda x: round(x))
new_table['Income'] = new_table['Income'].apply(lambda x: round(x))
new_table['Age(years)'] = new_table['Age(years)'].apply(lambda x: round(x))
new_table['Race/Ethnicity'] = new_table['Race/Ethnicity'].apply(lambda x: round(x))
display(new_table)

display(obesity_df['Class'].value_counts())
display(obesity_df['Question'].value_counts())

display(new_table)

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

obesity = new_table[new_table['Class'] == 1]
activity = new_table[new_table['Class'] == 2]
nutrition = new_table[new_table['Class'] == 3]

dfs = [("Obesity",obesity), ("Activity",activity), ("Nutrition",nutrition)]

for group_name, df in dfs:
  target_features = df[['Education', 'Income', 'Age(years)', 'Race/Ethnicity']]
  model_target = df['Data_Value']
  X_train, X_test, y_train, y_test = train_test_split(target_features, model_target, test_size = 0.2, random_state = 42)
  scaler = StandardScaler()
  X_train_scaled = scaler.fit_transform(X_train)
  X_test_scaled = scaler.transform(X_test)
  model = LinearRegression()
  model.fit(X_train_scaled, y_train)
  y_pred = model.predict(X_test_scaled)
  mse = mean_squared_error(y_test, y_pred)
  r2 = r2_score(y_test, y_pred)
  print(f"Mean Squared Error for {group_name}: {mse}")
  print(f"R-squared: {r2}")

  plt.scatter(y_test, y_pred, color='blue', alpha=0.5, label = "Predicted values")
  plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2, label = "Ideal Prediction")
  plt.title(f"{group_name} - Actual vs Predicted")
  plt.xlabel('Actual Data Value')
  plt.ylabel('Predicted Data Value')
  plt.grid(True)
  plt.legend()
  plt.show()

Mean Squared Error for Obesity: 28.361608311875717
R-squared: 0.2556615957364562

Mean Squared Error for Activity: 129.32780845095235
R-squared: 0.007346128474098879

Mean Squared Error for Nutrition: 102.35161305502322
R-squared: 0.1852917167638467

target_features = obesity[['Education', 'Income', 'Age(years)', 'Race/Ethnicity']]
model_target = obesity['Data_Value']
X_train, X_test, y_train, y_test = train_test_split(target_features, model_target, test_size = 0.2, random_state = 42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
model = LinearRegression()
model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"Mean Squared Error for Obesity: {mse}")
print(f"R-squared: {r2}")
plt.scatter(y_test, y_pred, color='blue', alpha=0.5, label = "Predicted Values")
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2, label = "Ideal Prediction")
plt.title(f"Obesity - Actual vs Predicted")
plt.xlabel('Actual Data Value')
plt.ylabel('Predicted Data Value')
plt.grid(True)
plt.legend()
plt.show()

Mean Squared Error for Obesity: 28.361608311875717
R-squared: 0.2556615957364562

from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline

models = {
    'Polynomial': make_pipeline(PolynomialFeatures(7), LinearRegression()),
    'KNN': KNeighborsRegressor(n_neighbors=20),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'Linear Regression': LinearRegression()
}

for model_name, model in models.items():
    model = model
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"Mean Squared Error for {model_name}: {mse}")
    print(f"R-squared: {r2}")

    plt.scatter(y_test, y_pred, color='blue', alpha=0.5, label = "Predicted Values")
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linewidth=2, label = "Ideal Prediction")
    plt.title(f"({model_name}) Obesity - Actual vs Predicted")
    plt.xlabel('Actual Data Value')
    plt.ylabel('Predicted Data Value')
    plt.grid(True)
    plt.legend()
    plt.show()

Mean Squared Error for Polynomial: 22.50360691296797
R-squared: 0.4094023626735278

Mean Squared Error for KNN: 23.434549173987705
R-squared: 0.3849701771144749

Mean Squared Error for Decision Tree: 22.36625415404215
R-squared: 0.4130071276881139

Mean Squared Error for Random Forest: 22.248140042551135
R-squared: 0.41610698254477285

Mean Squared Error for Gradient Boosting: 22.79475606241995
R-squared: 0.40176127649383364

Mean Squared Error for Linear Regression: 28.361608311875717
R-squared: 0.2556615957364562

race_val = race_dict['Hispanic']
age_val = age_dict['18 - 24']
income_val = income_dict['$75,000 or greater']
education_val = education_dict['College graduate']
obesity_percentage = models['Random Forest'].predict([[education_val, income_val, age_val, race_val]])
print(obesity_percentage)

[18.20874881]

	YearStart	YearEnd	LocationAbbr	LocationDesc	Datasource	Class	Topic	Question	Data_Value_Unit	Data_Value_Type	...	GeoLocation	ClassID	TopicID	QuestionID	DataValueTypeID	LocationID	StratificationCategory1	Stratification1	StratificationCategoryId1	StratificationID1
0	2020	2020	US	National	Behavioral Risk Factor Surveillance System	Physical Activity	Physical Activity - Behavior	Percent of adults who engage in no leisure-tim...	NaN	Value	...	NaN	PA	PA1	Q047	VALUE	59	Race/Ethnicity	Hispanic	RACE	RACEHIS
1	2014	2014	GU	Guam	Behavioral Risk Factor Surveillance System	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	(13.444304, 144.793731)	OWS	OWS1	Q036	VALUE	66	Education	High school graduate	EDU	EDUHSGRAD
2	2013	2013	US	National	Behavioral Risk Factor Surveillance System	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	NaN	OWS	OWS1	Q036	VALUE	59	Income	$50,000 - $74,999	INC	INC5075
3	2013	2013	US	National	Behavioral Risk Factor Surveillance System	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	NaN	OWS	OWS1	Q037	VALUE	59	Income	Data not reported	INC	INCNR
4	2015	2015	US	National	Behavioral Risk Factor Surveillance System	Physical Activity	Physical Activity - Behavior	Percent of adults who achieve at least 300 min...	NaN	Value	...	NaN	PA	PA1	Q045	VALUE	59	Income	Less than $15,000	INC	INCLESS15
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
93244	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q037	VALUE	56	Income	Less than $15,000	INC	INCLESS15
93245	2022	2022	WY	Wyoming	BRFSS	Physical Activity	Physical Activity - Behavior	Percent of adults who engage in no leisure-tim...	NaN	Value	...	(43.23554134300048, -108.10983035299967)	PA	PA1	Q047	VALUE	56	Education	Less than high school	EDU	EDUHS
93246	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q036	VALUE	56	Age (years)	35 - 44	AGEYR	AGEYR3544
93247	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q037	VALUE	56	Income	$35,000 - $49,999	INC	INC3550
93248	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	NaN	Value	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q036	VALUE	56	Education	Less than high school	EDU	EDUHS

	address	city	country	keys	latitude	longitude	name	postalCode	province	websites
0	324 Main St	Massena	US	us/ny/massena/324mainst/-1161002137	44.921300	-74.890210	McDonald's	13662	NY	http://mcdonalds.com,http://www.mcdonalds.com/...
1	530 Clinton Ave	Washington Court House	US	us/oh/washingtoncourthouse/530clintonave/-7914...	39.532550	-83.445260	Wendy's	43160	OH	http://www.wendys.com
2	408 Market Square Dr	Maysville	US	us/ky/maysville/408marketsquaredr/1051460804	38.627360	-83.791410	Frisch's Big Boy	41056	KY	http://www.frischs.com,https://www.frischs.com...
3	6098 State Highway 37	Massena	US	us/ny/massena/6098statehighway37/-1161002137	44.950080	-74.845530	McDonald's	13662	NY	http://mcdonalds.com,http://www.mcdonalds.com/...
4	139 Columbus Rd	Athens	US	us/oh/athens/139columbusrd/990890980	39.351550	-82.097280	OMG! Rotisserie	45701	OH	http://www.omgrotisserie.com,http://omgrotisse...
...	...	...	...	...	...	...	...	...	...	...
9995	3013 Peach Orchard Rd	Augusta	US	us/ga/augusta/3013peachorchardrd/-791445730	33.415257	-82.024531	Wendy's	30906	GA	http://www.wendys.com,http://wendys.com
9996	678 Northwest Hwy	Cary	US	us/il/cary/678northwesthwy/787691191	42.217300	-88.255800	Lee's Oriental Martial Arts	60013	IL	http://www.mcdonalds.com
9997	1708 Main St	Longmont	US	us/co/longmont/1708mainst/-448666054	40.189190	-105.101720	Five Guys	80501	CO	http://fiveguys.com
9998	67740 Highway 111	Cathedral City	US	us/ca/cathedralcity/67740highway111/-981164808	33.788640	-116.482150	El Pollo Loco	92234	CA	http://www.elpolloloco.com,http://elpolloloco.com
9999	5701 E La Palma Ave	Anaheim	US	us/ca/anaheim/5701elapalmaave/554191587	33.860074	-117.789762	Carl's Jr.	92807	CA	http://www.carlsjr.com

	id	dateAdded	dateUpdated	address	categories	city	country	keys	latitude	longitude	name	postalCode	province	sourceURLs	websites
0	AVwcmSyZIN2L1WUfmxyw	2015-10-19T23:47:58Z	2018-06-26T03:00:14Z	800 N Canal Blvd	American Restaurant and Fast Food Restaurant	Thibodaux	US	us/la/thibodaux/800ncanalblvd/1780593795	29.814697	-90.814742	SONIC Drive In	70301	LA	https://foursquare.com/v/sonic-drive-in/4b7361...	https://locations.sonicdrivein.com/la/thibodau...
1	AVwcmSyZIN2L1WUfmxyw	2015-10-19T23:47:58Z	2018-06-26T03:00:14Z	800 N Canal Blvd	Fast Food Restaurants	Thibodaux	US	us/la/thibodaux/800ncanalblvd/1780593795	29.814697	-90.814742	SONIC Drive In	70301	LA	https://foursquare.com/v/sonic-drive-in/4b7361...	https://locations.sonicdrivein.com/la/thibodau...
2	AVwcopQoByjofQCxgfVa	2016-03-29T05:06:36Z	2018-06-26T02:59:52Z	206 Wears Valley Rd	Fast Food Restaurant	Pigeon Forge	US	us/tn/pigeonforge/206wearsvalleyrd/-864103396	35.803788	-83.580553	Taco Bell	37863	TN	https://www.yellowpages.com/pigeon-forge-tn/mi...	http://www.tacobell.com,https://locations.taco...
3	AVweXN5RByjofQCxxilK	2017-01-03T07:46:11Z	2018-06-26T02:59:51Z	3652 Parkway	Fast Food	Pigeon Forge	US	us/tn/pigeonforge/3652parkway/93075755	35.782339	-83.551408	Arby's	37863	TN	http://www.yellowbook.com/profile/arbys_163389...	http://www.arbys.com,https://locations.arbys.c...
4	AWQ6MUvo3-Khe5l_j3SG	2018-06-26T02:59:43Z	2018-06-26T02:59:43Z	2118 Mt Zion Parkway	Fast Food Restaurant	Morrow	US	us/ga/morrow/2118mtzionparkway/1305117222	33.562738	-84.321143	Steak 'n Shake	30260	GA	https://foursquare.com/v/steak-n-shake/4bcf77a...	http://www.steaknshake.com/locations/23851-ste...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9995	AV12gJwna4HuVbed9Ayg	2017-07-24T21:28:46Z	2018-04-07T13:19:06Z	3460 Robinhood Rd	Fast Food Restaurants	Winston-Salem	US	us/nc/winston-salem/3460robinhoodrd/-66712705	36.117563	-80.316553	Pizza Hut	27106	NC	https://www.allmenus.com/nc/winston-salem/7341...	http://www.pizzahut.com
9996	AV12gJxKIxWefVJwhpzS	2017-07-24T21:28:46Z	2018-04-07T13:19:05Z	3069 Kernersville Rd	Fast Food Restaurants	Winston-Salem	US	us/nc/winston-salem/3069kernersvillerd/-66712705	36.077718	-80.176748	Pizza Hut	27107	NC	https://www.allmenus.com/nc/winston-salem/7340...	http://www.pizzahut.com
9997	AVwdJMdSByjofQCxl8Vr	2015-10-24T00:17:32Z	2018-04-07T13:19:05Z	838 S Main St	Fast Food Restaurants	Kernersville	US	us/nc/kernersville/838smainst/-66712705	36.111015	-80.089165	Pizza Hut	27284	NC	https://www.allmenus.com/nc/kernersville/73400...	http://www.pizzahut.com
9998	AVwdl2cykufWRAb57ZPs	2016-04-05T02:59:45Z	2018-04-07T13:19:05Z	1702 Glendale Dr SW	Fast Food Restaurants	Wilson	US	us/nc/wilson/1702glendaledrsw/-66712705	35.719981	-77.945795	Pizza Hut	27893	NC	https://www.allmenus.com/nc/wilson/73403-pizza...	http://www.pizzahut.com
9999	AVwdecWKIN2L1WUfwMWU	2016-11-08T02:26:32Z	2018-04-07T13:19:05Z	1405 W Broad St	Fast Food Restaurants	Elizabethtown	US	us/nc/elizabethtown/1405wbroadst/-66712705	34.632778	-78.624615	Pizza Hut	28337	NC	https://www.allmenus.com/nc/elizabethtown/7339...	http://www.pizzahut.com,http://api.citygridmed...

	YearStart	YearEnd	LocationAbbr	LocationDesc	Datasource	Class	Topic	Question	Data_Value	Data_Value_Alt	...	GeoLocation	ClassID	TopicID	QuestionID	DataValueTypeID	LocationID	StratificationCategory1	Stratification1	StratificationCategoryId1	StratificationID1
0	2020	2020	US	National	Behavioral Risk Factor Surveillance System	Physical Activity	Physical Activity - Behavior	Percent of adults who engage in no leisure-tim...	30.6	30.6	...	NaN	PA	PA1	Q047	VALUE	59	Race/Ethnicity	Hispanic	RACE	RACEHIS
1	2014	2014	GU	Guam	Behavioral Risk Factor Surveillance System	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	29.3	29.3	...	(13.444304, 144.793731)	OWS	OWS1	Q036	VALUE	66	Education	High school graduate	EDU	EDUHSGRAD
2	2013	2013	US	National	Behavioral Risk Factor Surveillance System	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	28.8	28.8	...	NaN	OWS	OWS1	Q036	VALUE	59	Income	$50,000 - $74,999	INC	INC5075
3	2013	2013	US	National	Behavioral Risk Factor Surveillance System	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	32.7	32.7	...	NaN	OWS	OWS1	Q037	VALUE	59	Income	Data not reported	INC	INCNR
4	2015	2015	US	National	Behavioral Risk Factor Surveillance System	Physical Activity	Physical Activity - Behavior	Percent of adults who achieve at least 300 min...	26.6	26.6	...	NaN	PA	PA1	Q045	VALUE	59	Income	Less than $15,000	INC	INCLESS15
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
93244	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	24.5	24.5	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q037	VALUE	56	Income	Less than $15,000	INC	INCLESS15
93245	2022	2022	WY	Wyoming	BRFSS	Physical Activity	Physical Activity - Behavior	Percent of adults who engage in no leisure-tim...	36.0	36.0	...	(43.23554134300048, -108.10983035299967)	PA	PA1	Q047	VALUE	56	Education	Less than high school	EDU	EDUHS
93246	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	35.2	35.2	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q036	VALUE	56	Age (years)	35 - 44	AGEYR	AGEYR3544
93247	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	35.3	35.3	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q037	VALUE	56	Income	$35,000 - $49,999	INC	INC3550
93248	2022	2022	WY	Wyoming	BRFSS	Obesity / Weight Status	Obesity / Weight Status	Percent of adults aged 18 years and older who ...	41.0	41.0	...	(43.23554134300048, -108.10983035299967)	OWS	OWS1	Q036	VALUE	56	Education	Less than high school	EDU	EDUHS

	YearEnd	Data_Value	High_Confidence_Limit
count	93249.000000	84014.000000	84014.000000
mean	2016.308068	31.226492	36.134303
std	3.308679	10.021059	10.978276
min	2011.000000	0.900000	3.000000
25%	2013.000000	24.400000	28.700000
50%	2017.000000	31.200000	36.000000
75%	2019.000000	37.000000	42.200000
max	2022.000000	77.600000	87.700000

	count
LocationDesc
National	1736
West Virginia	1736
Oklahoma	1736
Mississippi	1736
Oregon	1736
Wisconsin	1736
Kansas	1736
Florida	1736
Idaho	1736
Arizona	1736
Montana	1736
Georgia	1736
North Carolina	1736
Pennsylvania	1736
North Dakota	1736
South Carolina	1736
Nebraska	1736
Tennessee	1736
Missouri	1736
Nevada	1736
Iowa	1736
Indiana	1736
Ohio	1736
Alaska	1736
Vermont	1736
Colorado	1736
Kentucky	1736
Utah	1736
New York	1736
Wyoming	1736
District of Columbia	1736
Alabama	1736
Rhode Island	1736
Delaware	1736
Washington	1736
Maine	1736
Michigan	1736
Virginia	1736
California	1736
Texas	1736
Connecticut	1736
Massachusetts	1736
Arkansas	1736
Illinois	1736
New Hampshire	1736
New Mexico	1736
Maryland	1736
Minnesota	1736
Hawaii	1736
Louisiana	1736
South Dakota	1736
New Jersey	1493
Puerto Rico	1316
Guam	1260
Virgin Islands	644

	Education	Income	Age(years)	Race/Ethnicity	Data_Value	Class
0	2.666667	3.666667	2.000000	7.333333	35.200000	1.0
1	2.333333	4.666667	5.000000	7.333333	35.300000	1.0
2	3.000000	4.666667	3.000000	7.333333	31.900000	1.0
3	3.333333	4.666667	4.000000	7.333333	37.700000	1.0
4	2.333333	4.333333	3.333333	6.000000	34.133333	1.0
...	...	...	...	...	...	...
1731	4.000000	1.000000	3.000000	3.666667	24.500000	1.0
1732	1.000000	1.333333	6.000000	6.666667	36.000000	2.0
1733	3.000000	3.666667	3.000000	3.666667	35.200000	1.0
1734	3.000000	4.000000	3.666667	3.666667	35.300000	1.0
1735	1.000000	5.666667	4.333333	3.666667	41.000000	1.0

	count
Question
Percent of adults aged 18 years and older who have obesity	18117
Percent of adults aged 18 years and older who have an overweight classification	18117
Percent of adults who engage in no leisure-time physical activity	18089
Percent of adults who achieve at least 300 minutes a week of moderate-intensity aerobic physical activity or 150 minutes a week of vigorous-intensity aerobic activity (or an equivalent combination)	7449
Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic physical activity and engage in muscle-strengthening activities on 2 or more days a week	7449
Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic activity (or an equivalent combination)	7449
Percent of adults who engage in muscle-strengthening activities on 2 or more days a week	7449
Percent of adults who report consuming fruit less than one time daily	4565
Percent of adults who report consuming vegetables less than one time daily	4565

	count
Class
2	47885
1	36234
3	9130