Replies: 2 comments 4 replies
-
Hello @asghar13, Just to be sure I have understood your question, what you need is the probability of each sample to belong to one of the latent classes ? You can get the estimated value with the .get_latent_classes_weights() method that you can find here. We are planning to add the possibility to estimate this probability from features, but it is not implemented yet. |
Beta Was this translation helpful? Give feedback.
-
Hello, |
Beta Was this translation helpful? Give feedback.
-
Hello everyone,
I have a few questions regarding obtaining the output after running the
LatentClassConditionalLogit
model. You can see my code at the end of this message. Specifically, I need the following output:individual_level_class_assignments.csv
).I would be very grateful for any assistance in solving this issue or for any documentation that could help me better understand how to use the code for this purpose.
Thank you very much for your support in advance.
''
code for redeading the data ,
''
Define the columns to retain for the analysis
kept_columns = [
"ID", "Gender", "Age", "Education", "Occup", # Demographic and individual information
"Gas_Capital", "Gas_Annual", "Gas_Emission", "Gas_Work", # Attributes of "Gas"
"Electric_Capital", "Electric_Annual", "Electric_Emission", "Electric_Work", # Attributes of "Electric"
"Heatpump_Capital", "Heatpump_Annual", "Heatpump_Emission", "Heatpump_Work", # Attributes of "Heatpump"
"Solid_Capital", "Solid_Annual", "Solid_Emission", "Solid_Work", # Attributes of "Solid fuel"
"Gas_Av", "Electric_Av", "Heatpump_Av", "Solid_Av", # Availability indicators
"Choice", "Card" # User choices and choice cards
]
Filter the dataset to include only the defined columns
crheating_df = data[kept_columns]
Display a preview of the filtered dataset
print("Filtered dataset:")
print(crheating_df.head())
Display the unique values in the 'Choice' column (to verify mapping)
print("Unique values in 'Choice' column:")
print(crheating_df["Choice"].unique())
Map categorical choices to integers for modeling
choice_mapping = {
"Gas": 0,
"Electric": 1,
"Heatpump": 2,
"Solid": 3
}
crheating_df["Choice"] = crheating_df["Choice"].map(choice_mapping)
Ensure the 'Choice' column is of integer type
crheating_df["Choice"] = crheating_df["Choice"].astype(int)
Convert the dataset into the required format for choice modeling
from choice_learn.data import ChoiceDataset
dataset = ChoiceDataset.from_single_wide_df(
df=crheating_df, # Filtered dataset
items_id=["Gas", "Electric", "Heatpump", "Solid"], # Names of the alternatives
choices_column="Choice", # Column representing user choices
choice_format="items_index", # Encoding format for choices
shared_features_columns=["Gender", "Age", "Education", "Occup"], # Shared features across choices
items_features_suffixes=["Capital", "Annual", "Emission", "Work"], # Attributes for each item
available_items_suffix="Av", # Suffix for availability indicators
delimiter="_", # Delimiter used in column names
)
''
code for LatentClassConditionalLogit:
''
Ensure all numerical columns are explicitly converted to float32
numerical_columns = [
"Gender", "Age", "Education", "Occup", # Shared features
"Gas_Capital", "Gas_Annual", "Gas_Emission", "Gas_Work", # Item-specific features for Gas
"Electric_Capital", "Electric_Annual", "Electric_Emission", "Electric_Work", # Electric
"Heatpump_Capital", "Heatpump_Annual", "Heatpump_Emission", "Heatpump_Work", # Heatpump
"Solid_Capital", "Solid_Annual", "Solid_Emission", "Solid_Work" # Solid fuel
]
Convert these columns to float32
crheating_df[numerical_columns] = crheating_df[numerical_columns].astype("float32")
Convert Choice column to integer (if not already)
crheating_df["Choice"] = crheating_df["Choice"].astype("int32")
Recreate the ChoiceDataset with correctly typed columns
dataset = ChoiceDataset.from_single_wide_df(
df=crheating_df,
items_id=["Gas", "Electric", "Heatpump", "Solid"], # Names of the alternatives
choices_column="Choice", # Column representing user choices
choice_format="items_index", # Encoding format for choices
shared_features_columns=["Gender", "Age", "Education", "Occup"], # Shared features across choices
items_features_suffixes=["Capital", "Annual", "Emission", "Work"], # Attributes for each item
available_items_suffix="Av", # Suffix for availability indicators
delimiter="_", # Delimiter used in column names
)
Define and fit the model
from choice_learn.models.latent_class_mnl import LatentClassConditionalLogit
Initialize the model
lc_model_2 = LatentClassConditionalLogit(
n_latent_classes=3, # Number of latent classes
fit_method="mle", # Maximum Likelihood Estimation
optimizer="lbfgs", # Optimizer
epochs=1000, # Number of epochs
lbfgs_tolerance=1e-20 # Tolerance for convergence
)
Add shared coefficients for item-specific features
lc_model_2.add_shared_coefficient(coefficient_name="Capital", feature_name="Capital", items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="Annual", feature_name="Annual", items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="Emission", feature_name="Emission", items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="Work", feature_name="Work", items_indexes=[0, 1, 2, 3])
Add shared coefficients for demographic/shared features
lc_model_2.add_shared_coefficient(coefficient_name="Gender", feature_name="Gender", items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="Age", feature_name="Age", items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="Education", feature_name="Education", items_indexes=[0, 1, 2, 3])
lc_model_2.add_shared_coefficient(coefficient_name="Occup", feature_name="Occup", items_indexes=[0, 1, 2, 3])
Fit the model to the dataset
hist2 = lc_model_2.fit(dataset, verbose=1)
Print Latent Class Model results
print("Latent Class Model weights:")
print("Classes Logits:", lc_model_2.latent_logits)
for i in range(3): # Assuming 3 latent classes
print("\n")
print(f"Model Nb {i}, weights:", lc_model_2.models[i].trainable_weights)
Evaluate the model's Negative Log-Likelihood (NLL)
nll_2 = lc_model_2.evaluate(dataset) * len(dataset)
print(f"Negative Log-Likelihood: {nll_2}")
Generate structured output as a DataFrame for further analysis
report_data = []
for class_idx, model in enumerate(lc_model_2.models):
class_weights = model.trainable_weights
for weight, feature_name in zip(class_weights, ["Capital", "Annual", "Emission", "Work", "Gender", "Age", "Education", "Occup"]):
coef_estimation = weight.numpy().flatten()[0]
report_data.append({
"Latent Class": class_idx + 1,
"Feature": feature_name,
"Coefficient": coef_estimation
})
Convert the results into a DataFrame
report_df = pd.DataFrame(report_data)
Save the report to a file
output_path = r"C:\Users\mohamma11\Downloads\latent_class_conditional_logit_report.csv"
report_df.to_csv(output_path, index=False)
print(f"Report saved to {output_path}")
''
code for assining ID to classes.
''
Predict the probabilities for each individual for each class
individual_probabilities = lc_model_2.predict_probas(dataset).numpy() # Ensure this is a 2D array
Verify the shape of the probabilities array
print(f"Shape of individual_probabilities: {individual_probabilities.shape}")
Assign each individual to the class with the highest probability
assigned_classes = np.argmax(individual_probabilities, axis=1)
Create a DataFrame to store the results
individual_ids = crheating_df["ID"].values # Assuming the dataset has an "ID" column
results_df = pd.DataFrame({
"ID": individual_ids,
"Assigned Class": assigned_classes + 1, # Adding 1 to match class numbering (1-based)
})
Include probabilities for each class
for i in range(individual_probabilities.shape[1]):
results_df[f"Class {i + 1} Probability"] = individual_probabilities[:, i] ''
''
individual_level_class_assignments (1).csv
Beta Was this translation helpful? Give feedback.
All reactions